diff options
-rw-r--r-- | src/Main.hs | 99 | ||||
-rw-r--r-- | src/Text/Pandoc/Definition.hs | 34 | ||||
-rw-r--r-- | src/Text/Pandoc/HtmlEntities.hs | 31 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 85 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 118 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 601 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 631 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 267 | ||||
-rw-r--r-- | src/Text/Pandoc/UTF8.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 311 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 155 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 148 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 175 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RTF.hs | 155 | ||||
-rw-r--r-- | src/Text/ParserCombinators/Pandoc.hs | 24 |
15 files changed, 1571 insertions, 1266 deletions
diff --git a/src/Main.hs b/src/Main.hs index 0ca36f7ce..986ce9cf1 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,5 +1,15 @@ --- | Main Pandoc program. Parses command-line options and calls the --- appropriate readers and writers. +{- | + Module : Main + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Parses command-line options and calls the appropriate readers and +writers. +-} module Main where import Text.Pandoc.UTF8 ( decodeUTF8, encodeUTF8 ) import Text.Pandoc.Readers.Markdown ( readMarkdown ) @@ -13,7 +23,8 @@ import Text.Pandoc.Writers.LaTeX ( writeLaTeX ) import Text.Pandoc.Readers.LaTeX ( readLaTeX ) import Text.Pandoc.Writers.RTF ( writeRTF ) import Text.Pandoc.Writers.Markdown ( writeMarkdown ) -import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader, defaultRTFHeader, defaultS5Header, defaultLaTeXHeader ) +import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader, + defaultRTFHeader, defaultS5Header, defaultLaTeXHeader ) import Text.Pandoc.Definition import Text.Pandoc.Shared import System ( exitWith, getArgs, getProgName ) @@ -58,23 +69,24 @@ writeDoc options = prettyPandoc -- | Data structure for command line options. data Opt = Opt - { optPreserveTabs :: Bool -- ^ If @False@, convert tabs to spaces - , optTabStop :: Int -- ^ Number of spaces per tab - , optStandalone :: Bool -- ^ If @True@, include header and footer - , optReader :: ParserState -> String -> Pandoc -- ^ Reader to use - , optWriter :: WriterOptions -> Pandoc -> String -- ^ Writer to use - , optParseRaw :: Bool -- ^ If @True@, parse unconvertable HTML and TeX - , optCSS :: String -- ^ CSS file to link to - , optIncludeInHeader :: String -- ^ File to include in header - , optIncludeBeforeBody :: String -- ^ File to include at beginning of body - , optIncludeAfterBody :: String -- ^ File to include at end of body - , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT" - , optDefaultHeader :: String -- ^ Default header - , optTitlePrefix :: String -- ^ Optional prefix for HTML title - , optNumberSections :: Bool -- ^ If @True@, number sections in LaTeX - , optIncremental :: Bool -- ^ If @True@, show lists incrementally in S5 - , optSmart :: Bool -- ^ If @True@, use smart quotes, dashes, ... - , optASCIIMathML :: Bool -- ^ If @True@, use ASCIIMathML in HTML or S5 + { optPreserveTabs :: Bool -- ^ If @False@, convert tabs to spaces + , optTabStop :: Int -- ^ Number of spaces per tab + , optStandalone :: Bool -- ^ If @True@, include header, footer + , optReader :: ParserState -> String -> Pandoc -- ^ Read format + , optWriter :: WriterOptions -> Pandoc -> String -- ^ Write fmt + , optParseRaw :: Bool -- ^ If @True@, parse unconvertable + -- HTML and TeX + , optCSS :: String -- ^ CSS file to link to + , optIncludeInHeader :: String -- ^ File to include in header + , optIncludeBeforeBody :: String -- ^ File to include at top of body + , optIncludeAfterBody :: String -- ^ File to include at end of body + , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT" + , optDefaultHeader :: String -- ^ Default header + , optTitlePrefix :: String -- ^ Optional prefix for HTML title + , optNumberSections :: Bool -- ^ If @True@, number sections in LaTeX + , optIncremental :: Bool -- ^ If @True@, incremental lists in S5 + , optSmart :: Bool -- ^ If @True@, use smart typography + , optASCIIMathML :: Bool -- ^ If @True@, use ASCIIMathML in HTML } -- | Defaults for command-line options. @@ -121,18 +133,19 @@ options = , Option "fr" ["from","read"] (ReqArg (\arg opt -> case (lookup (map toLower arg) readers) of - Just reader -> return opt { optReader = reader } - Nothing -> error ("Unknown reader: " ++ arg) ) + Just reader -> return opt { optReader = reader } + Nothing -> error ("Unknown reader: " ++ arg) ) "FORMAT") - ("Source format (" ++ (concatMap (\(name, fn) -> " " ++ name) readers) ++ " )") + ("Source format (" ++ + (concatMap (\(name, fn) -> " " ++ name) readers) ++ " )") , Option "tw" ["to","write"] (ReqArg (\arg opt -> case (lookup (map toLower arg) writers) of - Just (writer, defaultHeader) -> - return opt { optWriter = writer, - optDefaultHeader = defaultHeader } - Nothing -> error ("Unknown writer: " ++ arg) ) + Just (writer, defaultHeader) -> + return opt { optWriter = writer, + optDefaultHeader = defaultHeader } + Nothing -> error ("Unknown writer: " ++ arg) ) "FORMAT") ("Output format (" ++ (concatMap (\(name, fn) -> " " ++ name) writers) ++ " )") @@ -164,7 +177,8 @@ options = , Option "m" ["asciimathml"] (NoArg - (\opt -> return opt { optASCIIMathML = True, optStandalone = True })) + (\opt -> return opt { optASCIIMathML = True, + optStandalone = True })) "Use ASCIIMathML script in html output" , Option "i" ["incremental"] @@ -179,7 +193,8 @@ options = , Option "c" ["css"] (ReqArg - (\arg opt -> return opt { optCSS = arg, optStandalone = True }) + (\arg opt -> return opt { optCSS = arg, + optStandalone = True }) "CSS") "Link to CSS style sheet" @@ -187,7 +202,8 @@ options = (ReqArg (\arg opt -> do text <- readFile arg - return opt { optIncludeInHeader = text, optStandalone = True }) + return opt { optIncludeInHeader = text, + optStandalone = True }) "FILENAME") "File to include at end of header (implies -s)" @@ -211,13 +227,15 @@ options = (ReqArg (\arg opt -> do text <- readFile arg - return opt { optCustomHeader = text, optStandalone = True }) + return opt { optCustomHeader = text, + optStandalone = True }) "FILENAME") "File to use for custom header (implies -s)" , Option "T" ["title-prefix"] (ReqArg - (\arg opt -> return opt { optTitlePrefix = arg, optStandalone = True }) + (\arg opt -> return opt { optTitlePrefix = arg, + optStandalone = True }) "STRING") "String to prefix to HTML window title" @@ -225,8 +243,8 @@ options = (ReqArg (\arg opt -> do let header = case (lookup arg writers) of - Just (writer, head) -> head - Nothing -> error ("Unknown reader: " ++ arg) + Just (writer, head) -> head + Nothing -> error ("Unknown reader: " ++ arg) hPutStrLn stdout header exitWith ExitSuccess) "FORMAT") @@ -267,13 +285,14 @@ main = do let startParserState = defaultParserState { stateParseRaw = parseRaw, stateTabStop = tabStop, stateStandalone = standalone } - let csslink = if (css == "") then - "" - else - "<link rel=\"stylesheet\" href=\"" ++ css ++ - "\" type=\"text/css\" media=\"all\" />\n" + let csslink = if (css == "") + then "" + else "<link rel=\"stylesheet\" href=\"" ++ css ++ + "\" type=\"text/css\" media=\"all\" />\n" let asciiMathMLLink = if asciiMathML then asciiMathMLScript else "" - let header = (if (customHeader == "DEFAULT") then defaultHeader else customHeader) ++ + let header = (if (customHeader == "DEFAULT") + then defaultHeader + else customHeader) ++ csslink ++ asciiMathMLLink ++ includeHeader let writerOptions = WriterOptions { writerStandalone = standalone, writerHeader = header, diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index 08ff3905e..b2655ffa0 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -1,5 +1,15 @@ --- | Definition of 'Pandoc' data structure for format-neutral representation --- of documents. +{- | + Module : Text.Pandoc.Definition + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Definition of 'Pandoc' data structure for format-neutral representation +of documents. +-} module Text.Pandoc.Definition where data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show) @@ -17,21 +27,24 @@ data Block | Blank -- ^ A blank line | Null -- ^ Nothing | Para [Inline] -- ^ Paragraph - | Key [Inline] Target -- ^ Reference key: name (list of inlines) and 'Target' + | Key [Inline] Target -- ^ Reference key: name (inlines) and 'Target' | CodeBlock String -- ^ Code block (literal) | RawHtml String -- ^ Raw HTML block (literal) | BlockQuote [Block] -- ^ Block quote (list of blocks) - | OrderedList [[Block]] -- ^ Ordered list (list of items, each a list of blocks) - | BulletList [[Block]] -- ^ Bullet list (list of items, each a list of blocks) - | Header Int [Inline] -- ^ Header - level (integer) and text (list of inlines) + | OrderedList [[Block]] -- ^ Ordered list (list of items, each + -- a list of blocks) + | BulletList [[Block]] -- ^ Bullet list (list of items, each + -- a list of blocks) + | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) | HorizontalRule -- ^ Horizontal rule - | Note String [Block] -- ^ Footnote or endnote - reference (string), text (list of blocks) + | Note String [Block] -- ^ Footnote or endnote - reference (string), + -- text (list of blocks) deriving (Eq, Read, Show) -- | Target for a link: either a URL or an indirect (labeled) reference. data Target = Src String String -- ^ First string is URL, second is title - | Ref [Inline] -- ^ Label (list of inlines) for an indirect reference + | Ref [Inline] -- ^ Label (list of inlines) for an indirect ref deriving (Show, Eq, Read) -- | Inline elements. @@ -42,9 +55,10 @@ data Inline | Code String -- ^ Inline code (literal) | Space -- ^ Inter-word space | LineBreak -- ^ Hard line break - | TeX String -- ^ LaTeX code (literal) + | TeX String -- ^ LaTeX code (literal) | HtmlInline String -- ^ HTML code (literal) | Link [Inline] Target -- ^ Hyperlink: text (list of inlines) and target - | Image [Inline] Target -- ^ Image: alternative text (list of inlines) and target + | Image [Inline] Target -- ^ Image: alternative text (list of inlines) + -- and target | NoteRef String -- ^ Footnote or endnote reference deriving (Show, Eq, Read) diff --git a/src/Text/Pandoc/HtmlEntities.hs b/src/Text/Pandoc/HtmlEntities.hs index bbb438ef5..a03548388 100644 --- a/src/Text/Pandoc/HtmlEntities.hs +++ b/src/Text/Pandoc/HtmlEntities.hs @@ -1,12 +1,22 @@ --- | Functions for encoding unicode characters as HTML entity --- references, and vice versa. +{- | + Module : Text.Pandoc.HtmlEntities + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Functions for encoding unicode characters as HTML entity references, +and vice versa. +-} module Text.Pandoc.HtmlEntities ( htmlEntityToChar, charToHtmlEntity, decodeEntities, encodeEntities ) where -import Char ( chr, ord ) +import Data.Char ( chr, ord ) import Text.Regex ( mkRegex, matchRegexAll ) import Maybe ( fromMaybe ) @@ -19,13 +29,15 @@ characterEntity = mkRegex "&#[0-9]+;|&[A-Za-z0-9]+;" decodeEntities :: String -> String decodeEntities str = case (matchRegexAll characterEntity str) of - Nothing -> str - Just (before, match, rest, _) -> before ++ replacement ++ (decodeEntities rest) + Nothing -> str + Just (before, match, rest, _) -> before ++ replacement ++ + (decodeEntities rest) where replacement = case (htmlEntityToChar match) of Just ch -> [ch] Nothing -> match --- | Returns a string with characters replaced with entity references where possible. +-- | Returns a string with characters replaced with entity references where +-- possible. encodeEntities :: String -> String encodeEntities = concatMap (\c -> fromMaybe [c] (charToHtmlEntity c)) @@ -44,10 +56,9 @@ htmlEntityToChar entity = charToHtmlEntity :: Char -> Maybe String charToHtmlEntity char = let matches = filter (\(entity, character) -> (character == char)) htmlEntityTable in - if (length matches) == 0 then - Nothing - else - Just (fst (head matches)) + if (length matches) == 0 + then Nothing + else Just (fst (head matches)) htmlEntityTable :: [(String, Char)] htmlEntityTable = [ diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f9a738e94..c157f3b0e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,4 +1,14 @@ --- | Converts HTML to 'Pandoc' document. +{- | + Module : Text.Pandoc.Readers.HTML + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of HTML to 'Pandoc' document. +-} module Text.Pandoc.Readers.HTML ( readHtml, rawHtmlInline, @@ -30,10 +40,11 @@ testString = testStringWith parseHtml -- Constants -- -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "br", "cite", - "code", "dfn", "em", "font", "i", "img", "input", "kbd", "label", "q", - "s", "samp", "select", "small", "span", "strike", "strong", "sub", - "sup", "textarea", "tt", "u", "var"] +inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", + "br", "cite", "code", "dfn", "em", "font", "i", "img", + "input", "kbd", "label", "q", "s", "samp", "select", + "small", "span", "strike", "strong", "sub", "sup", + "textarea", "tt", "u", "var"] -- -- HTML utility functions @@ -50,9 +61,10 @@ inlinesTilEnd tag = try (do return inlines) -- extract type from a tag: e.g. br from <br>, < br >, </br>, etc. -extractTagType tag = case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of - Just [match] -> (map toLower match) - Nothing -> "" +extractTagType tag = + case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of + Just [match] -> (map toLower match) + Nothing -> "" anyHtmlTag = try (do char '<' @@ -90,7 +102,8 @@ htmlTag tag = try (do -- parses a quoted html attribute value quoted quoteChar = do - result <- between (char quoteChar) (char quoteChar) (many (noneOf [quoteChar])) + result <- between (char quoteChar) (char quoteChar) + (many (noneOf [quoteChar])) return (result, [quoteChar]) htmlAttributes = do @@ -116,9 +129,11 @@ htmlRegularAttribute = try (do spaces (content, quoteStr) <- choice [ (quoted '\''), (quoted '"'), - (do{ a <- (many (alphaNum <|> (oneOf "-._:"))); - return (a,"")} ) ] - return (name, content, (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) + (do + a <- many (alphaNum <|> (oneOf "-._:")) + return (a,"")) ] + return (name, content, + (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) htmlEndTag tag = try (do char '<' @@ -135,17 +150,11 @@ isInline tag = (extractTagType tag) `elem` inlineHtmlTags anyHtmlBlockTag = try (do tag <- choice [anyHtmlTag, anyHtmlEndTag] - if isInline tag then - fail "inline tag" - else - return tag) + if isInline tag then fail "inline tag" else return tag) anyHtmlInlineTag = try (do tag <- choice [ anyHtmlTag, anyHtmlEndTag ] - if isInline tag then - return tag - else - fail "not an inline tag") + if isInline tag then return tag else fail "not an inline tag") -- scripts must be treated differently, because they can contain <> etc. htmlScript = try (do @@ -155,13 +164,11 @@ htmlScript = try (do rawHtmlBlock = try (do notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"]) - body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition] + body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, + definition] sp <- (many space) state <- getState - if stateParseRaw state then - return (RawHtml (body ++ sp)) - else - return Null) + if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null) htmlComment = try (do string "<!--" @@ -266,10 +273,10 @@ headerLevel n = try (do hrule = try (do (tag, attribs) <- htmlTag "hr" state <- getState - if (not (null attribs)) && (stateParseRaw state) then - unexpected "attributes in hr" -- in this case we want to parse it as raw html - else - return HorizontalRule) + if (not (null attribs)) && (stateParseRaw state) + then -- in this case we want to parse it as raw html + unexpected "attributes in hr" + else return HorizontalRule) -- -- code blocks @@ -352,29 +359,31 @@ inline = choice [ text, special ] <?> "inline" text = choice [ entity, strong, emph, code, str, linebreak, whitespace ] <?> "text" -special = choice [ link, image, rawHtmlInline ] <?> "link, inline html, or image" +special = choice [ link, image, rawHtmlInline ] <?> + "link, inline html, or image" entity = try (do char '&' - body <- choice [(many1 letter), - (try (do{ char '#'; num <- many1 digit; return ("#" ++ num)}))] + body <- choice [(many1 letter), (try (do + char '#' + num <- many1 digit + return ("#" ++ num)))] char ';' return (Str [fromMaybe '?' (htmlEntityToChar ("&" ++ body ++ ";"))])) code = try (do htmlTag "code" result <- manyTill anyChar (htmlEndTag "code") - -- remove internal line breaks, leading and trailing space, and decode entities - let result' = decodeEntities $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result + -- remove internal line breaks, leading and trailing space, + -- and decode entities + let result' = decodeEntities $ removeLeadingTrailingSpace $ + joinWithSep " " $ lines result return (Code result')) rawHtmlInline = do result <- choice [htmlScript, anyHtmlInlineTag] state <- getState - if stateParseRaw state then - return (HtmlInline result) - else - return (Str "") + if stateParseRaw state then return (HtmlInline result) else return (Str "") betweenTags tag = try (do htmlTag tag diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index a62ff7b94..81004b1f1 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,14 @@ --- | Converts LaTeX to 'Pandoc' document. +{- | + Module : Text.Pandoc.Readers.LaTeX + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of LaTeX to 'Pandoc' document. +-} module Text.Pandoc.Readers.LaTeX ( readLaTeX, rawLaTeXInline, @@ -81,7 +91,8 @@ end name = try (do spaces return name) --- | Returns a list of block elements containing the contents of an environment. +-- | Returns a list of block elements containing the contents of an +-- environment. environment name = try (do begin name spaces @@ -104,15 +115,16 @@ anyEnvironment = try (do -- | Process LaTeX preamble, extracting metadata. processLaTeXPreamble = do - manyTill (choice [bibliographic, comment, unknownCommand, nullBlock]) (try (string "\\begin{document}")) + manyTill (choice [bibliographic, comment, unknownCommand, nullBlock]) + (try (string "\\begin{document}")) spaces -- | Parse LaTeX and return 'Pandoc'. parseLaTeX = do - option () processLaTeXPreamble -- preamble might not be present, if a fragment + option () processLaTeXPreamble -- preamble might not be present (fragment) blocks <- parseBlocks spaces - option "" (string "\\end{document}") -- if parsing a fragment, this might not be present + option "" (string "\\end{document}") -- might not be present (in fragment) spaces eof state <- getState @@ -122,7 +134,8 @@ parseLaTeX = do let title' = stateTitle state let authors' = stateAuthors state let date' = stateDate state - return (Pandoc (Meta title' authors' date') (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks))) + return (Pandoc (Meta title' authors' date') + (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks))) -- -- parsing blocks @@ -133,9 +146,10 @@ parseBlocks = do result <- many block return result -block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, comment, - bibliographic, para, specialEnvironment, itemBlock, unknownEnvironment, - unknownCommand ] <?> "block" +block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, + comment, bibliographic, para, specialEnvironment, + itemBlock, unknownEnvironment, unknownCommand ] <?> + "block" -- -- header blocks @@ -157,7 +171,8 @@ headerLevel n = try (do -- hrule = try (do - oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", "\\newpage" ] + oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", + "\\newpage" ] spaces return HorizontalRule) @@ -166,8 +181,10 @@ hrule = try (do -- codeBlock = try (do - string "\\begin{verbatim}" -- don't use begin function because it gobbles whitespace - option "" blanklines -- we want to gobble blank lines, but not leading space + string "\\begin{verbatim}" -- don't use begin function because it + -- gobbles whitespace + option "" blanklines -- we want to gobble blank lines, but not + -- leading space contents <- manyTill anyChar (try (string "\\end{verbatim}")) spaces return (CodeBlock (stripTrailingNewlines contents))) @@ -266,7 +283,8 @@ authors = try (do string "\\author{" authors <- manyTill anyChar (char '}') spaces - let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\\\\\" "\n" authors + let authors' = map removeLeadingTrailingSpace $ lines $ + gsub "\\\\\\\\" "\n" authors updateState (\state -> state { stateAuthors = authors' }) return Null) @@ -286,21 +304,19 @@ date = try (do itemBlock = try (do ("item", _, args) <- command state <- getState - if (stateParserContext state == ListItemState) then - fail "item should be handled by list block" - else - if null args then - return Null - else - return (Plain [Str (stripFirstAndLast (head args))])) + if (stateParserContext state == ListItemState) + then fail "item should be handled by list block" + else if null args + then return Null + else return (Plain [Str (stripFirstAndLast (head args))])) -- -- raw LaTeX -- specialEnvironment = do -- these are always parsed as raw - followedBy' (choice (map (\name -> begin name) ["tabular", "figure", "tabbing", "eqnarry", - "picture", "table", "verse", "theorem"])) + followedBy' (choice (map (\name -> begin name) ["tabular", "figure", + "tabbing", "eqnarry", "picture", "table", "verse", "theorem"])) rawLaTeXEnvironment -- | Parse any LaTeX environment and return a Para block containing @@ -316,18 +332,20 @@ rawLaTeXEnvironment = try (do args <- option [] commandArgs let argStr = concat args contents <- manyTill (choice [(many1 (noneOf "\\")), - (do{ (Para [TeX str]) <- rawLaTeXEnvironment; return str }), - string "\\"]) (end name') + (do + (Para [TeX str]) <- rawLaTeXEnvironment + return str), + string "\\" ]) + (end name') spaces return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++ - (concat contents) ++ "\\end{" ++ name' ++ "}")])) + (concat contents) ++ "\\end{" ++ name' ++ "}")])) unknownEnvironment = try (do state <- getState - result <- if stateParseRaw state then -- check to see whether we should include raw TeX - rawLaTeXEnvironment -- if so, get the whole raw environment - else - anyEnvironment -- otherwise just the contents + result <- if stateParseRaw state -- check whether we should include raw TeX + then rawLaTeXEnvironment -- if so, get whole raw environment + else anyEnvironment -- otherwise just the contents return result) unknownCommand = try (do @@ -338,14 +356,12 @@ unknownCommand = try (do spaces let argStr = concat args state <- getState - if (name == "item") && ((stateParserContext state) == ListItemState) then - fail "should not be parsed as raw" - else - string "" - if stateParseRaw state then - return (Plain [TeX ("\\" ++ name ++ star ++ argStr)]) - else - return (Plain [Str (joinWithSep " " args)])) + if (name == "item") && ((stateParserContext state) == ListItemState) + then fail "should not be parsed as raw" + else string "" + if stateParseRaw state + then return (Plain [TeX ("\\" ++ name ++ star ++ argStr)]) + else return (Plain [Str (joinWithSep " " args)])) -- latex comment comment = try (do @@ -358,9 +374,9 @@ comment = try (do -- inline -- -inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots, accentedChar, - specialChar, specialInline, escapedChar, unescapedChar, str, - endline, whitespace ] <?> "inline" +inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots, + accentedChar, specialChar, specialInline, escapedChar, + unescapedChar, str, endline, whitespace ] <?> "inline" specialInline = choice [ link, image, footnote, rawLaTeXInline ] <?> "link, raw TeX, note, or image" @@ -397,8 +413,8 @@ accentTable = ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]), ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] -specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, oslash, pound, - euro, copyright, sect ] +specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, + oslash, pound, euro, copyright, sect ] ccedil = try (do char '\\' @@ -563,15 +579,14 @@ image = try (do footnote = try (do (name, _, (contents:[])) <- command - if ((name == "footnote") || (name == "thanks")) then - string "" - else - fail "not a footnote or thanks command" + if ((name == "footnote") || (name == "thanks")) + then string "" + else fail "not a footnote or thanks command" let contents' = stripFirstAndLast contents state <- getState let blocks = case runParser parseBlocks state "footnote" contents of - Left err -> error $ "Input:\n" ++ show contents' ++ - "\nError:\n" ++ show err + Left err -> error $ "Input:\n" ++ show contents' ++ + "\nError:\n" ++ show err Right result -> result let notes = stateNoteBlocks state let nextRef = case notes of @@ -586,8 +601,7 @@ rawLaTeXInline = try (do (name, star, args) <- command let argStr = concat args state <- getState - if ((name == "begin") || (name == "end") || (name == "item")) then - fail "not an inline command" - else - string "" + if ((name == "begin") || (name == "end") || (name == "item")) + then fail "not an inline command" + else string "" return (TeX ("\\" ++ name ++ star ++ argStr))) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 034e5d8e4..9ca73dee5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,14 @@ --- | Convert markdown to Pandoc document. +{- | + Module : Text.Pandoc.Readers.Markdown + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of markdown-formatted plain text to 'Pandoc' document. +-} module Text.Pandoc.Readers.Markdown ( readMarkdown ) where @@ -8,8 +18,8 @@ import Text.ParserCombinators.Pandoc import Text.Pandoc.Definition import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) import Text.Pandoc.Shared -import Text.Pandoc.Readers.HTML ( rawHtmlInline, rawHtmlBlock, anyHtmlBlockTag, - anyHtmlInlineTag ) +import Text.Pandoc.Readers.HTML ( rawHtmlInline, rawHtmlBlock, + anyHtmlBlockTag, anyHtmlInlineTag ) import Text.Pandoc.HtmlEntities ( decodeEntities ) import Text.Regex ( matchRegex, mkRegex ) import Text.ParserCombinators.Parsec @@ -57,9 +67,10 @@ blockQuoteChar = '>' hyphenChar = '-' -- treat these as potentially non-text when parsing inline: -specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, emphStartAlt, - emphEndAlt, codeStart, codeEnd, autoLinkEnd, autoLinkStart, mathStart, - mathEnd, imageStart, noteStart, hyphenChar] +specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, + emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd, + autoLinkStart, mathStart, mathEnd, imageStart, noteStart, + hyphenChar] -- -- auxiliary functions @@ -115,14 +126,16 @@ numberOfNote (Note ref _) = (read ref) numberOfNote _ = 0 parseMarkdown = do - updateState (\state -> state { stateParseRaw = True }) -- need to parse raw HTML + updateState (\state -> state { stateParseRaw = True }) + -- need to parse raw HTML, since markdown allows it (title, author, date) <- option ([],[],"") titleBlock blocks <- parseBlocks let blocks' = filter (/= Null) blocks state <- getState let keys = reverse $ stateKeyBlocks state let notes = reverse $ stateNoteBlocks state - let sortedNotes = sortBy (\x y -> compare (numberOfNote x) (numberOfNote y)) notes + let sortedNotes = sortBy (\x y -> compare (numberOfNote x) + (numberOfNote y)) notes return (Pandoc (Meta title author date) (blocks' ++ sortedNotes ++ keys)) -- @@ -133,8 +146,9 @@ parseBlocks = do result <- manyTill block eof return result -block = choice [ codeBlock, note, referenceKey, header, hrule, list, blockQuote, rawHtmlBlocks, - rawLaTeXEnvironment, para, plain, blankBlock, nullBlock ] <?> "block" +block = choice [ codeBlock, note, referenceKey, header, hrule, list, + blockQuote, rawHtmlBlocks, rawLaTeXEnvironment, para, + plain, blankBlock, nullBlock ] <?> "block" -- -- header blocks @@ -154,33 +168,33 @@ atxClosing = try (do newline option "" blanklines) -setextHeader = choice (map (\x -> setextH x) (enumFromTo 1 (length setextHChars))) +setextHeader = choice $ + map (\x -> setextH x) (enumFromTo 1 (length setextHChars)) setextH n = try (do - txt <- many1 (do {notFollowedBy newline; inline}) - endline - many1 (char (setextHChars !! (n-1))) - skipSpaces - newline - option "" blanklines - return (Header n (normalizeSpaces txt))) + txt <- many1 (do {notFollowedBy newline; inline}) + endline + many1 (char (setextHChars !! (n-1))) + skipSpaces + newline + option "" blanklines + return (Header n (normalizeSpaces txt))) -- -- hrule block -- -hruleWith chr = - try (do - skipSpaces - char chr - skipSpaces - char chr - skipSpaces - char chr - skipMany (oneOf (chr:spaceChars)) - newline - option "" blanklines - return HorizontalRule) +hruleWith chr = try (do + skipSpaces + char chr + skipSpaces + char chr + skipSpaces + char chr + skipMany (oneOf (chr:spaceChars)) + newline + option "" blanklines + return HorizontalRule) hrule = choice (map hruleWith hruleChars) <?> "hrule" @@ -189,9 +203,9 @@ hrule = choice (map hruleWith hruleChars) <?> "hrule" -- indentedLine = try (do - indentSpaces - result <- manyTill anyChar newline - return (result ++ "\n")) + indentSpaces + result <- manyTill anyChar newline + return (result ++ "\n")) -- two or more indented lines, possibly separated by blank lines indentedBlock = try (do @@ -201,62 +215,66 @@ indentedBlock = try (do return (res1 ++ blanks ++ res2)) codeBlock = do - result <- choice [indentedBlock, indentedLine] - option "" blanklines - return (CodeBlock (stripTrailingNewlines result)) + result <- choice [indentedBlock, indentedLine] + option "" blanklines + return (CodeBlock (stripTrailingNewlines result)) -- -- note block -- rawLine = try (do - notFollowedBy' blankline - notFollowedBy' noteMarker - contents <- many1 nonEndline - end <- option "" (do - newline - option "" indentSpaces - return "\n") - return (contents ++ end)) + notFollowedBy' blankline + notFollowedBy' noteMarker + contents <- many1 nonEndline + end <- option "" (do + newline + option "" indentSpaces + return "\n") + return (contents ++ end)) rawLines = do lines <- many1 rawLine return (concat lines) note = try (do - ref <- noteMarker - char ':' - skipSpaces - skipEndline - raw <- sepBy rawLines (try (do {blankline; indentSpaces})) - option "" blanklines - -- parse the extracted text, which may contain various block elements: - state <- getState - let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of - Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err - Right result -> result - let identifiers = stateNoteIdentifiers state - case (findIndex (== ref) identifiers) of - Just n -> updateState (\s -> s {stateNoteBlocks = - (Note (show (n+1)) parsed):(stateNoteBlocks s)}) - Nothing -> updateState id - return Null) + ref <- noteMarker + char ':' + skipSpaces + skipEndline + raw <- sepBy rawLines (try (do {blankline; indentSpaces})) + option "" blanklines + -- parse the extracted text, which may contain various block elements: + state <- getState + let parsed = case runParser parseBlocks + (state {stateParserContext = BlockQuoteState}) "block" + ((joinWithSep "\n" raw) ++ "\n\n") of + Left err -> error $ "Raw block:\n" ++ show raw ++ + "\nError:\n" ++ show err + Right result -> result + let identifiers = stateNoteIdentifiers state + case (findIndex (== ref) identifiers) of + Just n -> updateState (\s -> s {stateNoteBlocks = + (Note (show (n+1)) parsed):(stateNoteBlocks s)}) + Nothing -> updateState id + return Null) -- -- block quotes -- emacsBoxQuote = try (do - string ",----" - manyTill anyChar newline - raw <- manyTill (try (do{ char '|'; - option ' ' (char ' '); - result <- manyTill anyChar newline; - return result})) - (string "`----") - manyTill anyChar newline - option "" blanklines - return raw) + string ",----" + manyTill anyChar newline + raw <- manyTill (try (do + char '|' + option ' ' (char ' ') + result <- manyTill anyChar newline + return result)) + (string "`----") + manyTill anyChar newline + option "" blanklines + return raw) emailBlockQuoteStart = try (do skipNonindentSpaces @@ -265,24 +283,28 @@ emailBlockQuoteStart = try (do return "> ") emailBlockQuote = try (do - emailBlockQuoteStart - raw <- sepBy (many (choice [nonEndline, - (try (do{ endline; - notFollowedBy' emailBlockQuoteStart; - return '\n'}))])) - (try (do {newline; emailBlockQuoteStart})) - newline <|> (do{ eof; return '\n'}) - option "" blanklines - return raw) + emailBlockQuoteStart + raw <- sepBy (many (choice [nonEndline, + (try (do + endline + notFollowedBy' emailBlockQuoteStart + return '\n'))])) + (try (do {newline; emailBlockQuoteStart})) + newline <|> (do{ eof; return '\n' }) + option "" blanklines + return raw) blockQuote = do - raw <- choice [ emailBlockQuote, emacsBoxQuote ] - -- parse the extracted block, which may contain various block elements: - state <- getState - let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of - Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err - Right result -> result - return (BlockQuote parsed) + raw <- choice [ emailBlockQuote, emacsBoxQuote ] + -- parse the extracted block, which may contain various block elements: + state <- getState + let parsed = case runParser parseBlocks + (state {stateParserContext = BlockQuoteState}) "block" + ((joinWithSep "\n" raw) ++ "\n\n") of + Left err -> error $ "Raw block:\n" ++ show raw ++ + "\nError:\n" ++ show err + Right result -> result + return (BlockQuote parsed) -- -- list blocks @@ -290,85 +312,81 @@ blockQuote = do list = choice [ bulletList, orderedList ] <?> "list" -bulletListStart = - try (do - option ' ' newline -- if preceded by a Plain block in a list context - skipNonindentSpaces - notFollowedBy' hrule -- because hrules start out just like lists - oneOf bulletListMarkers - spaceChar - skipSpaces) - -orderedListStart = - try (do - option ' ' newline -- if preceded by a Plain block in a list context - skipNonindentSpaces - many1 digit <|> count 1 letter - oneOf orderedListDelimiters - oneOf spaceChars - skipSpaces) +bulletListStart = try (do + option ' ' newline -- if preceded by a Plain block in a list context + skipNonindentSpaces + notFollowedBy' hrule -- because hrules start out just like lists + oneOf bulletListMarkers + spaceChar + skipSpaces) + +orderedListStart = try (do + option ' ' newline -- if preceded by a Plain block in a list context + skipNonindentSpaces + many1 digit <|> count 1 letter + oneOf orderedListDelimiters + oneOf spaceChars + skipSpaces) -- parse a line of a list item (start = parser for beginning of list item) listLine start = try (do notFollowedBy' start notFollowedBy blankline - notFollowedBy' (do{ indentSpaces; - many (spaceChar); - choice [bulletListStart, orderedListStart]}) + notFollowedBy' (do + indentSpaces + many (spaceChar) + choice [bulletListStart, orderedListStart]) line <- manyTill anyChar newline return (line ++ "\n")) -- parse raw text for one list item, excluding start marker and continuations -rawListItem start = - try (do - start - result <- many1 (listLine start) - blanks <- many blankline - return ((concat result) ++ blanks)) +rawListItem start = try (do + start + result <- many1 (listLine start) + blanks <- many blankline + return ((concat result) ++ blanks)) -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation start = - try (do - followedBy' indentSpaces - result <- many1 (listContinuationLine start) - blanks <- many blankline - return ((concat result) ++ blanks)) +listContinuation start = try (do + followedBy' indentSpaces + result <- many1 (listContinuationLine start) + blanks <- many blankline + return ((concat result) ++ blanks)) listContinuationLine start = try (do - notFollowedBy' blankline - notFollowedBy' start - option "" indentSpaces - result <- manyTill anyChar newline - return (result ++ "\n")) - -listItem start = - try (do - first <- rawListItem start - rest <- many (listContinuation start) - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState}) - "block" raw of - Left err -> error $ "Raw block:\n" ++ raw ++ "\nError:\n" ++ show err - Right result -> result - where raw = concat (first:rest) - return parsed) - -orderedList = - try (do - items <- many1 (listItem orderedListStart) - let items' = compactify items - return (OrderedList items')) - -bulletList = - try (do - items <- many1 (listItem bulletListStart) - let items' = compactify items - return (BulletList items')) + notFollowedBy' blankline + notFollowedBy' start + option "" indentSpaces + result <- manyTill anyChar newline + return (result ++ "\n")) + +listItem start = try (do + first <- rawListItem start + rest <- many (listContinuation start) + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let parsed = case runParser parseBlocks + (state {stateParserContext = ListItemState}) + "block" raw of + Left err -> error $ "Raw block:\n" ++ raw ++ + "\nError:\n" ++ show err + Right result -> result + where raw = concat (first:rest) + return parsed) + +orderedList = try (do + items <- many1 (listItem orderedListStart) + let items' = compactify items + return (OrderedList items')) + +bulletList = try (do + items <- many1 (listItem bulletListStart) + let items' = compactify items + return (BulletList items')) -- -- paragraph block @@ -377,7 +395,10 @@ bulletList = para = try (do result <- many1 inline newline - choice [ (do{ followedBy' (oneOfStrings [">", ",----"]); return "" }), blanklines ] + choice [ (do + followedBy' (oneOfStrings [">", ",----"]) + return "" ), + blanklines ] let result' = normalizeSpaces result return (Para result')) @@ -391,30 +412,28 @@ plain = do -- rawHtmlBlocks = try (do - htmlBlocks <- many1 rawHtmlBlock - let combined = concatMap (\(RawHtml str) -> str) htmlBlocks - let combined' = if (last combined == '\n') then - init combined -- strip extra newline - else - combined - return (RawHtml combined')) + htmlBlocks <- many1 rawHtmlBlock + let combined = concatMap (\(RawHtml str) -> str) htmlBlocks + let combined' = if (last combined == '\n') + then init combined -- strip extra newline + else combined + return (RawHtml combined')) -- -- reference key -- -referenceKey = - try (do - skipSpaces - label <- reference - char labelSep - skipSpaces - option ' ' (char autoLinkStart) - src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars)) - option ' ' (char autoLinkEnd) - tit <- option "" title - blanklines - return (Key label (Src (removeTrailingSpace src) tit))) +referenceKey = try (do + skipSpaces + label <- reference + char labelSep + skipSpaces + option ' ' (char autoLinkStart) + src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars)) + option ' ' (char autoLinkEnd) + tit <- option "" title + blanklines + return (Key label (Src (removeTrailingSpace src) tit))) -- -- inline @@ -423,10 +442,11 @@ referenceKey = text = choice [ math, strong, emph, code2, code1, str, linebreak, tabchar, whitespace, endline ] <?> "text" -inline = choice [ rawLaTeXInline, escapedChar, special, hyphens, text, ltSign, symbol ] <?> "inline" +inline = choice [ rawLaTeXInline, escapedChar, special, hyphens, text, + ltSign, symbol ] <?> "inline" -special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline, autoLink, - image ] <?> "link, inline html, note, or image" +special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline, + autoLink, image ] <?> "link, inline html, note, or image" escapedChar = escaped anyChar @@ -443,30 +463,33 @@ symbol = do hyphens = try (do result <- many1 (char '-') - if (length result) == 1 then - skipEndline -- don't want to treat endline after hyphen as a space - else - do{ string ""; return Space } + if (length result) == 1 + then skipEndline -- don't want to treat endline after hyphen as a space + else do{ string ""; return Space } return (Str result)) -- parses inline code, between codeStart and codeEnd -code1 = - try (do - char codeStart - result <- many (noneOf [codeEnd]) - char codeEnd - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines - return (Code result')) +code1 = try (do + char codeStart + result <- many (noneOf [codeEnd]) + char codeEnd + -- get rid of any internal newlines + let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result + return (Code result')) -- parses inline code, between 2 codeStarts and 2 codeEnds -code2 = - try (do - string [codeStart, codeStart] - result <- manyTill anyChar (try (string [codeEnd, codeEnd])) - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines - return (Code result')) - -mathWord = many1 (choice [(noneOf (" \t\n\\" ++ [mathEnd])), (try (do {c <- char '\\'; notFollowedBy (char mathEnd); return c}))]) +code2 = try (do + string [codeStart, codeStart] + result <- manyTill anyChar (try (string [codeEnd, codeEnd])) + let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result + -- get rid of any internal newlines + return (Code result')) + +mathWord = many1 (choice [ (noneOf (" \t\n\\" ++ [mathEnd])), + (try (do + c <- char '\\' + notFollowedBy (char mathEnd) + return c))]) math = try (do char mathStart @@ -477,12 +500,14 @@ math = try (do emph = do result <- choice [ (enclosed (char emphStart) (char emphEnd) inline), - (enclosed (char emphStartAlt) (char emphEndAlt) inline) ] + (enclosed (char emphStartAlt) (char emphEndAlt) inline) ] return (Emph (normalizeSpaces result)) strong = do - result <- choice [ (enclosed (count 2 (char emphStart)) (count 2 (char emphEnd)) inline), - (enclosed (count 2 (char emphStartAlt)) (count 2 (char emphEndAlt)) inline)] + result <- choice [ (enclosed (count 2 (char emphStart)) + (count 2 (char emphEnd)) inline), + (enclosed (count 2 (char emphStartAlt)) + (count 2 (char emphEndAlt)) inline) ] return (Strong (normalizeSpaces result)) whitespace = do @@ -507,23 +532,21 @@ str = do return (Str (decodeEntities result)) -- an endline character that can be treated as a space, not a structural break -endline = - try (do - newline - -- next line would allow block quotes without preceding blank line - -- Markdown.pl does allow this, but there's a chance of a wrapped - -- greater-than sign triggering a block quote by accident... --- notFollowedBy' (choice [emailBlockQuoteStart, string ",----"]) - notFollowedBy blankline - -- parse potential list starts at beginning of line differently if in a list: - st <- getState - if (stateParserContext st) == ListItemState then - do - notFollowedBy' orderedListStart - notFollowedBy' bulletListStart - else - option () pzero - return Space) +endline = try (do + newline + -- next line would allow block quotes without preceding blank line + -- Markdown.pl does allow this, but there's a chance of a wrapped + -- greater-than sign triggering a block quote by accident... + -- notFollowedBy' (choice [emailBlockQuoteStart, string ",----"]) + notFollowedBy blankline + -- parse potential list-starts differently if in a list: + st <- getState + if (stateParserContext st) == ListItemState + then do + notFollowedBy' orderedListStart + notFollowedBy' bulletListStart + else option () pzero + return Space) -- -- links @@ -537,92 +560,92 @@ reference = do return (normalizeSpaces label) -- source for a link, with optional title -source = - try (do - char srcStart - option ' ' (char autoLinkStart) - src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners)) - option ' ' (char autoLinkEnd) - tit <- option "" title - skipSpaces - char srcEnd - return (Src (removeTrailingSpace src) tit)) - -titleWith startChar endChar = - try (do - skipSpaces - skipEndline -- a title can be on the next line from the source - skipSpaces - char startChar - tit <- manyTill (choice [ try (do {char '\\'; char endChar}), - (noneOf (endChar:endLineChars)) ]) (char endChar) - let tit' = gsub "\"" """ tit - return tit') - -title = choice [titleWith '(' ')', titleWith '"' '"', titleWith '\'' '\''] <?> "title" +source = try (do + char srcStart + option ' ' (char autoLinkStart) + src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners)) + option ' ' (char autoLinkEnd) + tit <- option "" title + skipSpaces + char srcEnd + return (Src (removeTrailingSpace src) tit)) + +titleWith startChar endChar = try (do + skipSpaces + skipEndline -- a title can be on the next line from the source + skipSpaces + char startChar + tit <- manyTill (choice [ try (do {char '\\'; char endChar}), + (noneOf (endChar:endLineChars)) ]) (char endChar) + let tit' = gsub "\"" """ tit + return tit') + +title = choice [ titleWith '(' ')', + titleWith '"' '"', + titleWith '\'' '\''] <?> "title" link = choice [explicitLink, referenceLink] <?> "link" -explicitLink = - try (do - label <- reference - src <- source - return (Link label src)) +explicitLink = try (do + label <- reference + src <- source + return (Link label src)) referenceLink = choice [referenceLinkDouble, referenceLinkSingle] -referenceLinkDouble = -- a link like [this][/url/] - try (do - label <- reference - skipSpaces - skipEndline - skipSpaces - ref <- reference - return (Link label (Ref ref))) - -referenceLinkSingle = -- a link like [this] - try (do - label <- reference - return (Link label (Ref []))) - -autoLink = -- a link <like.this.com> - try (do - notFollowedBy' anyHtmlBlockTag - src <- between (char autoLinkStart) (char autoLinkEnd) - (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd]))) - case (matchRegex emailAddress src) of - Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) "")) - Nothing -> return (Link [Str src] (Src src ""))) - -emailAddress = mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace - -image = - try (do - char imageStart - (Link label src) <- link - return (Image label src)) +-- a link like [this][/url/] +referenceLinkDouble = try (do + label <- reference + skipSpaces + skipEndline + skipSpaces + ref <- reference + return (Link label (Ref ref))) + +-- a link like [this] +referenceLinkSingle = try (do + label <- reference + return (Link label (Ref []))) + +-- a link <like.this.com> +autoLink = try (do + notFollowedBy' anyHtmlBlockTag + src <- between (char autoLinkStart) (char autoLinkEnd) + (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd]))) + case (matchRegex emailAddress src) of + Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) "")) + Nothing -> return (Link [Str src] (Src src ""))) + +emailAddress = + mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace + +image = try (do + char imageStart + (Link label src) <- link + return (Image label src)) noteMarker = try (do - char labelStart - char noteStart - manyTill (noneOf " \t\n") (char labelEnd)) + char labelStart + char noteStart + manyTill (noneOf " \t\n") (char labelEnd)) noteRef = try (do - ref <- noteMarker - state <- getState - let identifiers = (stateNoteIdentifiers state) ++ [ref] - updateState (\st -> st {stateNoteIdentifiers = identifiers}) - return (NoteRef (show (length identifiers)))) + ref <- noteMarker + state <- getState + let identifiers = (stateNoteIdentifiers state) ++ [ref] + updateState (\st -> st {stateNoteIdentifiers = identifiers}) + return (NoteRef (show (length identifiers)))) inlineNote = try (do - char noteStart - char labelStart - contents <- manyTill inline (char labelEnd) - state <- getState - let identifiers = stateNoteIdentifiers state - let ref = show $ (length identifiers) + 1 - let noteBlocks = stateNoteBlocks state - updateState (\st -> st {stateNoteIdentifiers = (identifiers ++ [ref]), - stateNoteBlocks = (Note ref [Para contents]):noteBlocks}) - return (NoteRef ref)) + char noteStart + char labelStart + contents <- manyTill inline (char labelEnd) + state <- getState + let identifiers = stateNoteIdentifiers state + let ref = show $ (length identifiers) + 1 + let noteBlocks = stateNoteBlocks state + updateState (\st -> st {stateNoteIdentifiers = (identifiers ++ [ref]), + stateNoteBlocks = + (Note ref [Para contents]):noteBlocks}) + return (NoteRef ref)) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 69c7d9baa..1672e06dc 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,4 +1,14 @@ --- | Parse reStructuredText and return Pandoc document. +{- | + Module : Text.Pandoc.Readers.RST + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion from reStructuredText to 'Pandoc' document. +-} module Text.Pandoc.Readers.RST ( readRST ) where @@ -61,16 +71,14 @@ promoteHeaders num [] = [] -- promote all the other headers. titleTransform :: [Block] -- ^ list of blocks -> ([Block], [Inline]) -- ^ modified list of blocks, title -titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title and subtitle - if (any isHeader1 rest) || (any isHeader2 rest) then - ((Header 1 head1):(Header 2 head2):rest, []) - else - ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) +titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle + if (any isHeader1 rest) || (any isHeader2 rest) + then ((Header 1 head1):(Header 2 head2):rest, []) + else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) titleTransform ((Header 1 head1):rest) = -- title, no subtitle - if (any isHeader1 rest) then - ((Header 1 head1):rest, []) - else - ((promoteHeaders 1 rest), head1) + if (any isHeader1 rest) + then ((Header 1 head1):rest, []) + else ((promoteHeaders 1 rest), head1) titleTransform blocks = (blocks, []) parseRST = do @@ -78,17 +86,18 @@ parseRST = do input <- getInput blocks <- parseBlocks -- first pass let anonymousKeys = filter isAnonKeyBlock blocks - let blocks' = if (null anonymousKeys) then - blocks - else -- run parser again to fill in anonymous links... - case runParser parseBlocks (state { stateKeyBlocks = anonymousKeys }) + let blocks' = if (null anonymousKeys) + then blocks + else -- run parser again to fill in anonymous links... + case runParser parseBlocks + (state { stateKeyBlocks = anonymousKeys }) "RST source, second pass" input of - Left err -> error $ "\nError:\n" ++ show err - Right result -> (filter isNotAnonKeyBlock result) - let (blocks'', title) = if stateStandalone state then - titleTransform blocks' - else - (blocks', []) + Left err -> error $ "\nError:\n" ++ show err + Right result -> + (filter isNotAnonKeyBlock result) + let (blocks'', title) = if stateStandalone state + then titleTransform blocks' + else (blocks', []) state <- getState let authors = stateAuthors state let date = stateDate state @@ -103,9 +112,10 @@ parseBlocks = do result <- manyTill block eof return result -block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, referenceKey, - imageBlock, unknownDirective, header, hrule, list, fieldList, lineBlock, - para, plain, blankBlock, nullBlock ] <?> "block" +block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, + referenceKey, imageBlock, unknownDirective, header, + hrule, list, fieldList, lineBlock, para, plain, + blankBlock, nullBlock ] <?> "block" -- -- field list @@ -117,28 +127,32 @@ fieldListItem = try (do string ": " skipSpaces first <- manyTill anyChar newline - rest <- many (do{ notFollowedBy (char ':'); - notFollowedBy blankline; - skipSpaces; - manyTill anyChar newline }) + rest <- many (do + notFollowedBy (char ':') + notFollowedBy blankline + skipSpaces + manyTill anyChar newline ) return (name, (joinWithSep " " (first:rest)))) fieldList = try (do items <- many1 fieldListItem blanklines let authors = case (lookup "Authors" items) of - Just auth -> [auth] - Nothing -> map snd (filter (\(x,y) -> x == "Author") items) + Just auth -> [auth] + Nothing -> map snd (filter (\(x,y) -> x == "Author") items) let date = case (lookup "Date" items) of - Just dat -> dat - Nothing -> "" + Just dat -> dat + Nothing -> "" let title = case (lookup "Title" items) of - Just tit -> [Str tit] - Nothing -> [] - let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && (x /= "Date") && - (x /= "Title")) items - let result = map (\(x,y) -> Para [Strong [Str x], Str ":", Space, Str y]) remaining - updateState (\st -> st { stateAuthors = authors, stateDate = date, stateTitle = title }) + Just tit -> [Str tit] + Nothing -> [] + let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && + (x /= "Date") && (x /= "Title")) items + let result = map (\(x,y) -> + Para [Strong [Str x], Str ":", Space, Str y]) remaining + updateState (\st -> st { stateAuthors = authors, + stateDate = date, + stateTitle = title }) return (BlockQuote result)) -- @@ -164,18 +178,17 @@ lineBlock = try (do para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph" codeBlockStart = try (do - string "::" - blankline - blankline) + string "::" + blankline + blankline) -- paragraph that ends in a :: starting a code block paraBeforeCodeBlock = try (do result <- many1 (do {notFollowedBy' codeBlockStart; inline}) followedBy' (string "::") - return (Para (if (last result == Space) then - normalizeSpaces result - else - (normalizeSpaces result) ++ [Str ":"]))) + return (Para (if (last result == Space) + then normalizeSpaces result + else (normalizeSpaces result) ++ [Str ":"]))) -- regular paragraph paraNormal = try (do @@ -195,9 +208,9 @@ plain = do -- imageBlock = try (do - string ".. image:: " - src <- manyTill anyChar newline - return (Plain [Image [Str "image"] (Src src "")])) + string ".. image:: " + src <- manyTill anyChar newline + return (Plain [Image [Str "image"] (Src src "")])) -- -- header blocks @@ -207,59 +220,58 @@ header = choice [ doubleHeader, singleHeader ] <?> "header" -- a header with lines on top and bottom doubleHeader = try (do - c <- oneOf underlineChars - rest <- many (char c) -- the top line - let lenTop = length (c:rest) - skipSpaces - newline - txt <- many1 (do {notFollowedBy blankline; inline}) - pos <- getPosition - let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else (do {return ()}) - blankline -- spaces and newline - count lenTop (char c) -- the bottom line - blanklines - -- check to see if we've had this kind of header before. - -- if so, get appropriate level. if not, add to list. - state <- getState - let headerTable = stateHeaderTable state - let (headerTable', level) = case findIndex (== DoubleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - return (Header level (normalizeSpaces txt))) + c <- oneOf underlineChars + rest <- many (char c) -- the top line + let lenTop = length (c:rest) + skipSpaces + newline + txt <- many1 (do {notFollowedBy blankline; inline}) + pos <- getPosition + let len = (sourceColumn pos) - 1 + if (len > lenTop) then fail "title longer than border" else (do {return ()}) + blankline -- spaces and newline + count lenTop (char c) -- the bottom line + blanklines + -- check to see if we've had this kind of header before. + -- if so, get appropriate level. if not, add to list. + state <- getState + let headerTable = stateHeaderTable state + let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) + setState (state { stateHeaderTable = headerTable' }) + return (Header level (normalizeSpaces txt))) -- a header with line on the bottom only singleHeader = try (do - notFollowedBy' whitespace - txt <- many1 (do {notFollowedBy blankline; inline}) - pos <- getPosition - let len = (sourceColumn pos) - 1 - blankline - c <- oneOf underlineChars - rest <- count (len - 1) (char c) - many (char c) - blanklines - state <- getState - let headerTable = stateHeaderTable state - let (headerTable', level) = case findIndex (== SingleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - return (Header level (normalizeSpaces txt))) + notFollowedBy' whitespace + txt <- many1 (do {notFollowedBy blankline; inline}) + pos <- getPosition + let len = (sourceColumn pos) - 1 + blankline + c <- oneOf underlineChars + rest <- count (len - 1) (char c) + many (char c) + blanklines + state <- getState + let headerTable = stateHeaderTable state + let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) + setState (state { stateHeaderTable = headerTable' }) + return (Header level (normalizeSpaces txt))) -- -- hrule block -- -hruleWith chr = - try (do - count 4 (char chr) - skipMany (char chr) - skipSpaces - newline - blanklines - return HorizontalRule) +hruleWith chr = try (do + count 4 (char chr) + skipMany (char chr) + skipSpaces + newline + blanklines + return HorizontalRule) hrule = choice (map hruleWith underlineChars) <?> "hrule" @@ -269,9 +281,9 @@ hrule = choice (map hruleWith underlineChars) <?> "hrule" -- read a line indented by a given string indentedLine indents = try (do - string indents - result <- manyTill anyChar newline - return (result ++ "\n")) + string indents + result <- manyTill anyChar newline + return (result ++ "\n")) -- two or more indented lines, possibly separated by blank lines -- if variable = True, then any indent will work, but it must be consistent through the block @@ -279,54 +291,59 @@ indentedLine indents = try (do indentedBlock variable = try (do state <- getState let tabStop = stateTabStop state - indents <- if variable then - many1 (oneOf " \t") - else - oneOfStrings ["\t", (replicate tabStop ' ')] + indents <- if variable + then many1 (oneOf " \t") + else oneOfStrings ["\t", (replicate tabStop ' ')] firstline <- manyTill anyChar newline rest <- many (choice [ indentedLine indents, - try (do {b <- blanklines; l <- indentedLine indents; return (b ++ l)})]) + try (do + b <- blanklines + l <- indentedLine indents + return (b ++ l))]) option "" blanklines return (firstline ++ "\n" ++ (concat rest))) codeBlock = try (do - codeBlockStart - result <- indentedBlock False -- the False means we want one tab stop indent on each line - return (CodeBlock (stripTrailingNewlines result))) + codeBlockStart + result <- indentedBlock False + -- the False means we want one tab stop indent on each line + return (CodeBlock (stripTrailingNewlines result))) -- -- raw html -- rawHtmlBlock = try (do - string ".. raw:: html" - blanklines - result <- indentedBlock True - return (RawHtml result)) + string ".. raw:: html" + blanklines + result <- indentedBlock True + return (RawHtml result)) -- -- raw latex -- rawLaTeXBlock = try (do - string ".. raw:: latex" - blanklines - result <- indentedBlock True - return (Para [(TeX result)])) + string ".. raw:: latex" + blanklines + result <- indentedBlock True + return (Para [(TeX result)])) -- -- block quotes -- blockQuote = try (do - block <- indentedBlock True - -- parse the extracted block, which may contain various block elements: - state <- getState - let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) - "block" (block ++ "\n\n") of - Left err -> error $ "Raw block:\n" ++ show block ++ "\nError:\n" ++ show err - Right result -> result - return (BlockQuote parsed)) + block <- indentedBlock True + -- parse the extracted block, which may contain various block elements: + state <- getState + let parsed = case runParser parseBlocks + (state {stateParserContext = BlockQuoteState}) + "block" (block ++ "\n\n") of + Left err -> error $ "Raw block:\n" ++ show block ++ + "\nError:\n" ++ show err + Right result -> result + return (BlockQuote parsed)) -- -- list blocks @@ -335,34 +352,36 @@ blockQuote = try (do list = choice [ bulletList, orderedList ] <?> "list" -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart = - try (do - notFollowedBy' hrule -- because hrules start out just like lists - marker <- oneOf bulletListMarkers - white <- many1 spaceChar - let len = length (marker:white) - return len) +bulletListStart = try (do + notFollowedBy' hrule -- because hrules start out just like lists + marker <- oneOf bulletListMarkers + white <- many1 spaceChar + let len = length (marker:white) + return len) withPeriodSuffix parser = try (do - a <- parser - b <- char '.' - return (a ++ [b])) + a <- parser + b <- char '.' + return (a ++ [b])) withParentheses parser = try (do - a <- char '(' - b <- parser - c <- char ')' - return ([a] ++ b ++ [c])) + a <- char '(' + b <- parser + c <- char ')' + return ([a] ++ b ++ [c])) withRightParen parser = try (do - a <- parser - b <- char ')' - return (a ++ [b])) + a <- parser + b <- char ')' + return (a ++ [b])) upcaseWord = map toUpper romanNumeral = do - let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii", "xxiv" ] + let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi", + "vii", "viii", "ix", "x", "xi", "xii", "xiii", + "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx", + "xxi", "xxii", "xxiii", "xxiv" ] let upperNumerals = map upcaseWord lowerNumerals result <- choice $ map string (lowerNumerals ++ upperNumerals) return result @@ -372,15 +391,14 @@ orderedListEnumerator = choice [ many1 digit, count 1 letter, romanNumeral ] --- parses ordered list start and returns its length (inc. following whitespace) -orderedListStart = - try (do - marker <- choice [ withPeriodSuffix orderedListEnumerator, - withParentheses orderedListEnumerator, - withRightParen orderedListEnumerator ] - white <- many1 spaceChar - let len = length (marker ++ white) - return len) +-- parses ordered list start and returns its length (inc following whitespace) +orderedListStart = try (do + marker <- choice [ withPeriodSuffix orderedListEnumerator, + withParentheses orderedListEnumerator, + withRightParen orderedListEnumerator ] + white <- many1 spaceChar + let len = length (marker ++ white) + return len) -- parse a line of a list item listLine markerLength = try (do @@ -393,72 +411,73 @@ listLine markerLength = try (do indentWith num = do state <- getState let tabStop = stateTabStop state - if (num < tabStop) then - count num (char ' ') - else - choice [ try (count num (char ' ')), - (try (do {char '\t'; count (num - tabStop) (char ' ')})) ] + if (num < tabStop) + then count num (char ' ') + else choice [ try (count num (char ' ')), + (try (do {char '\t'; count (num - tabStop) (char ' ')})) ] -- parse raw text for one list item, excluding start marker and continuations -rawListItem start = - try (do - markerLength <- start - firstLine <- manyTill anyChar newline - restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))) - --- continuation of a list item - indented and separated by blankline or (in compact lists) --- endline. Note: nested lists are parsed as continuations. -listContinuation markerLength = - try (do - blanks <- many1 blankline - result <- many1 (listLine markerLength) - return (blanks ++ (concat result))) - -listItem start = - try (do - (markerLength, first) <- rawListItem start - rest <- many (listContinuation markerLength) - blanks <- choice [ try (do {b <- many blankline; followedBy' start; return b}), - many1 blankline ] -- whole list must end with blank - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState}) - "list item" raw of - Left err -> error $ "Raw:\n" ++ raw ++ "\nError:\n" ++ show err - Right result -> result - where raw = concat (first:rest) ++ blanks - return parsed) - -orderedList = - try (do - items <- many1 (listItem orderedListStart) - let items' = compactify items - return (OrderedList items')) - -bulletList = - try (do - items <- many1 (listItem bulletListStart) - let items' = compactify items - return (BulletList items')) +rawListItem start = try (do + markerLength <- start + firstLine <- manyTill anyChar newline + restLines <- many (listLine markerLength) + return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))) + +-- continuation of a list item - indented and separated by blankline or +-- (in compact lists) endline. +-- Note: nested lists are parsed as continuations. +listContinuation markerLength = try (do + blanks <- many1 blankline + result <- many1 (listLine markerLength) + return (blanks ++ (concat result))) + +listItem start = try (do + (markerLength, first) <- rawListItem start + rest <- many (listContinuation markerLength) + blanks <- choice [ try (do + b <- many blankline + followedBy' start + return b), + many1 blankline ] -- whole list must end with blank + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let parsed = case runParser parseBlocks + (state {stateParserContext = ListItemState}) "list item" + raw of + Left err -> error $ "Raw:\n" ++ raw ++ + "\nError:\n" ++ show err + Right result -> result + where raw = concat (first:rest) ++ blanks + return parsed) + +orderedList = try (do + items <- many1 (listItem orderedListStart) + let items' = compactify items + return (OrderedList items')) + +bulletList = try (do + items <- many1 (listItem bulletListStart) + let items' = compactify items + return (BulletList items')) -- -- unknown directive (e.g. comment) -- unknownDirective = try (do - string ".. " - manyTill anyChar newline - many (do {string " "; - char ':'; - many1 (noneOf "\n:"); - char ':'; - many1 (noneOf "\n"); - newline}) - option "" blanklines - return Null) + string ".. " + manyTill anyChar newline + many (do + string " " + char ':' + many1 (noneOf "\n:") + char ':' + many1 (noneOf "\n") + newline) + option "" blanklines + return Null) -- -- reference key @@ -467,39 +486,43 @@ unknownDirective = try (do referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] imageKey = try (do - string ".. |" - ref <- manyTill inline (char '|') - skipSpaces - string "image::" - src <- manyTill anyChar newline - return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) ""))) + string ".. |" + ref <- manyTill inline (char '|') + skipSpaces + string "image::" + src <- manyTill anyChar newline + return (Key (normalizeSpaces ref) + (Src (removeLeadingTrailingSpace src) ""))) anonymousKey = try (do - choice [string ".. __:", string "__"] - skipSpaces - src <- manyTill anyChar newline - state <- getState - return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) ""))) + choice [string ".. __:", string "__"] + skipSpaces + src <- manyTill anyChar newline + state <- getState + return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) ""))) regularKeyQuoted = try (do - string ".. _`" - ref <- manyTill inline (string "`:") - skipSpaces - src <- manyTill anyChar newline - return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) ""))) + string ".. _`" + ref <- manyTill inline (string "`:") + skipSpaces + src <- manyTill anyChar newline + return (Key (normalizeSpaces ref) + (Src (removeLeadingTrailingSpace src) ""))) regularKey = try (do - string ".. _" - ref <- manyTill inline (char ':') - skipSpaces - src <- manyTill anyChar newline - return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) ""))) + string ".. _" + ref <- manyTill inline (char ':') + skipSpaces + src <- manyTill anyChar newline + return (Key (normalizeSpaces ref) + (Src (removeLeadingTrailingSpace src) ""))) -- -- inline -- -text = choice [ strong, emph, code, str, tabchar, whitespace, endline ] <?> "text" +text = choice [ strong, emph, code, str, tabchar, whitespace, + endline ] <?> "text" inline = choice [ escapedChar, special, hyphens, text, symbol ] <?> "inline" @@ -507,7 +530,8 @@ special = choice [ link, image ] <?> "link, inline html, or image" hyphens = try (do result <- many1 (char '-') - option Space endline -- don't want to treat endline after hyphen or dash as a space + option Space endline + -- don't want to treat endline after hyphen or dash as a space return (Str result)) escapedChar = escaped anyChar @@ -517,12 +541,11 @@ symbol = do return (Str [result]) -- parses inline code, between codeStart and codeEnd -code = - try (do - string "``" - result <- manyTill anyChar (string "``") - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result - return (Code result')) +code = try (do + string "``" + result <- manyTill anyChar (string "``") + let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result + return (Code result')) emph = do result <- enclosed (char '*') (char '*') inline @@ -546,99 +569,95 @@ str = do return (Str result) -- an endline character that can be treated as a space, not a structural break -endline = - try (do - newline - notFollowedBy blankline - -- parse potential list starts at beginning of line differently if in a list: - st <- getState - if ((stateParserContext st) == ListItemState) then - notFollowedBy' (choice [orderedListStart, bulletListStart]) - else - option () pzero - return Space) +endline = try (do + newline + notFollowedBy blankline + -- parse potential list-starts at beginning of line differently in a list: + st <- getState + if ((stateParserContext st) == ListItemState) + then notFollowedBy' (choice [orderedListStart, bulletListStart]) + else option () pzero + return Space) -- -- links -- -link = choice [explicitLink, referenceLink, autoLink, oneWordReferenceLink] <?> "link" - -explicitLink = - try (do - char '`' - label <- manyTill inline (try (do {spaces; char '<'})) - src <- manyTill (noneOf ">\n ") (char '>') - skipSpaces - string "`_" - return (Link (normalizeSpaces label) (Src (removeLeadingTrailingSpace src) ""))) - -anonymousLinkEnding = - try (do - char '_' - state <- getState - let anonKeys = stateKeyBlocks state - -- if there's a list of anon key refs (from previous pass), pop one off. - -- otherwise return an anon key ref for the next pass to take care of... - case anonKeys of - (Key [Str "_"] src):rest -> - do{ setState (state { stateKeyBlocks = rest }); - return src } - otherwise -> return (Ref [Str "_"])) - -referenceLink = - try (do - char '`' - label <- manyTill inline (string "`_") - src <- option (Ref []) anonymousLinkEnding - return (Link (normalizeSpaces label) src)) - -oneWordReferenceLink = - try (do - label <- many1 alphaNum - char '_' - src <- option (Ref []) anonymousLinkEnding - notFollowedBy alphaNum -- because this_is_not a link - return (Link [Str label] src)) - -uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", "mailto:", - "news:", "telnet:" ] +link = choice [explicitLink, referenceLink, autoLink, + oneWordReferenceLink] <?> "link" + +explicitLink = try (do + char '`' + label <- manyTill inline (try (do {spaces; char '<'})) + src <- manyTill (noneOf ">\n ") (char '>') + skipSpaces + string "`_" + return (Link (normalizeSpaces label) + (Src (removeLeadingTrailingSpace src) ""))) + +anonymousLinkEnding = try (do + char '_' + state <- getState + let anonKeys = stateKeyBlocks state + -- if there's a list of anon key refs (from previous pass), pop one off. + -- otherwise return an anon key ref for the next pass to take care of... + case anonKeys of + (Key [Str "_"] src):rest -> + do + setState (state { stateKeyBlocks = rest }) + return src + otherwise -> return (Ref [Str "_"])) + +referenceLink = try (do + char '`' + label <- manyTill inline (string "`_") + src <- option (Ref []) anonymousLinkEnding + return (Link (normalizeSpaces label) src)) + +oneWordReferenceLink = try (do + label <- many1 alphaNum + char '_' + src <- option (Ref []) anonymousLinkEnding + notFollowedBy alphaNum -- because this_is_not a link + return (Link [Str label] src)) + +uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", + "mailto:", "news:", "telnet:" ] uri = try (do - scheme <- uriScheme - identifier <- many1 (noneOf " \t\n") - return (scheme ++ identifier)) + scheme <- uriScheme + identifier <- many1 (noneOf " \t\n") + return (scheme ++ identifier)) autoURI = try (do - src <- uri - return (Link [Str src] (Src src ""))) + src <- uri + return (Link [Str src] (Src src ""))) emailChar = alphaNum <|> oneOf "-+_." emailAddress = try (do - firstLetter <- alphaNum - restAddr <- many emailChar - let addr = firstLetter:restAddr - char '@' - dom <- domain - return (addr ++ '@':dom)) + firstLetter <- alphaNum + restAddr <- many emailChar + let addr = firstLetter:restAddr + char '@' + dom <- domain + return (addr ++ '@':dom)) domainChar = alphaNum <|> char '-' domain = try (do - first <- many1 domainChar - dom <- many1 (try (do{ char '.'; many1 domainChar })) - return (joinWithSep "." (first:dom))) + first <- many1 domainChar + dom <- many1 (try (do{ char '.'; many1 domainChar })) + return (joinWithSep "." (first:dom))) autoEmail = try (do - src <- emailAddress - return (Link [Str src] (Src ("mailto:" ++ src) ""))) + src <- emailAddress + return (Link [Str src] (Src ("mailto:" ++ src) ""))) autoLink = autoURI <|> autoEmail -- For now, we assume that all substitution references are for images. -image = - try (do - char '|' - ref <- manyTill inline (char '|') - return (Image (normalizeSpaces ref) (Ref ref))) +image = try (do + char '|' + ref <- manyTill inline (char '|') + return (Image (normalizeSpaces ref) (Ref ref))) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index a420e3766..0bedef0bc 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,4 +1,14 @@ --- | Utility functions and definitions used by the various Pandoc modules. +{- | + Module : Text.Pandoc.Shared + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Utility functions and definitions used by the various Pandoc modules. +-} module Text.Pandoc.Shared ( -- * Text processing gsub, @@ -50,17 +60,16 @@ readWith :: GenParser Char ParserState a -- ^ parser -> a readWith parser state input = case runParser parser state "source" input of - Left err -> error $ "\nError:\n" ++ show err + Left err -> error $ "\nError:\n" ++ show err Right result -> result -- | Parse a string with @parser@ (for testing). testStringWith :: (Show a) => GenParser Char ParserState a - -> String - -> IO () -testStringWith parser str = putStrLn $ show $ readWith parser defaultParserState str - --- | Parser state + -> String + -> IO () +testStringWith parser str = putStrLn $ show $ + readWith parser defaultParserState str data HeaderType = SingleHeader Char -- ^ Single line of characters underneath @@ -68,24 +77,28 @@ data HeaderType deriving (Eq, Show) data ParserContext - = BlockQuoteState -- ^ Used when running parser on contents of blockquote - | ListItemState -- ^ Used when running parser on list item contents - | NullState -- ^ Default state + = BlockQuoteState -- ^ Used when running parser on contents of blockquote + | ListItemState -- ^ Used when running parser on list item contents + | NullState -- ^ Default state deriving (Eq, Show) data ParserState = ParserState - { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML and LaTeX? - stateParserContext :: ParserContext, -- ^ What are we parsing? - stateKeyBlocks :: [Block], -- ^ List of reference key blocks - stateKeysUsed :: [[Inline]], -- ^ List of references used so far - stateNoteBlocks :: [Block], -- ^ List of note blocks - stateNoteIdentifiers :: [String], -- ^ List of footnote identifiers, in order encountered - stateTabStop :: Int, -- ^ Tab stop - stateStandalone :: Bool, -- ^ If @True@, parse bibliographic info - stateTitle :: [Inline], -- ^ Title of document - stateAuthors :: [String], -- ^ Authors of document - stateDate :: String, -- ^ Date of document - stateHeaderTable :: [HeaderType] -- ^ List of header types used, in what order (for reStructuredText only) + { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML + -- and LaTeX? + stateParserContext :: ParserContext, -- ^ What are we parsing? + stateKeyBlocks :: [Block], -- ^ List of reference key blocks + stateKeysUsed :: [[Inline]], -- ^ List of references used + stateNoteBlocks :: [Block], -- ^ List of note blocks + stateNoteIdentifiers :: [String], -- ^ List of footnote identifiers + -- in the order encountered + stateTabStop :: Int, -- ^ Tab stop + stateStandalone :: Bool, -- ^ If @True@, parse + -- bibliographic info + stateTitle :: [Inline], -- ^ Title of document + stateAuthors :: [String], -- ^ Authors of document + stateDate :: String, -- ^ Date of document + stateHeaderTable :: [HeaderType] -- ^ List of header types used, + -- in what order (rst only) } deriving Show @@ -115,9 +128,9 @@ consolidateList (inline:rest) = inline:(consolidateList rest) consolidateList [] = [] -- | Indent string as a block. -indentBy :: Int -- ^ Number of spaces to indent the block - -> Int -- ^ Number of spaces to indent first line, relative to block - -> String -- ^ Contents of block to indent +indentBy :: Int -- ^ Number of spaces to indent the block + -> Int -- ^ Number of spaces (rel to block) to indent first line + -> String -- ^ Contents of block to indent -> String indentBy num first [] = "" indentBy num first str = @@ -130,19 +143,27 @@ prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks -> [Block] -- ^ List of blocks -> String prettyBlockList indent [] = indentBy indent 0 "[]" -prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" +prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ + (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" -- | Prettyprint Pandoc block element. prettyBlock :: Block -> String -prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ (prettyBlockList 2 blocks) -prettyBlock (Note ref blocks) = "Note " ++ (show ref) ++ "\n " ++ (prettyBlockList 2 blocks) -prettyBlock (OrderedList blockLists) = "OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" -prettyBlock (BulletList blockLists) = "BulletList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" +prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ + (prettyBlockList 2 blocks) +prettyBlock (Note ref blocks) = "Note " ++ (show ref) ++ "\n " ++ + (prettyBlockList 2 blocks) +prettyBlock (OrderedList blockLists) = + "OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " + (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" +prettyBlock (BulletList blockLists) = "BulletList\n" ++ + indentBy 2 0 ("[ " ++ (joinWithSep ", " + (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" prettyBlock block = show block -- | Prettyprint Pandoc document. prettyPandoc :: Pandoc -> String -prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++ ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" +prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++ + ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" -- | Convert tabs to spaces (with adjustable tab stop). tabsToSpaces :: Int -- ^ Tabstop @@ -160,7 +181,9 @@ tabsInLine num tabstop "" = "" tabsInLine num tabstop (c:cs) = let replacement = (if (c == '\t') then (replicate num ' ') else [c]) in let nextnumraw = (num - (length replacement)) in - let nextnum = if (nextnumraw < 1) then (nextnumraw + tabstop) else nextnumraw in + let nextnum = if (nextnumraw < 1) + then (nextnumraw + tabstop) + else nextnumraw in replacement ++ (tabsInLine nextnum tabstop cs) -- | Substitute string for every occurrence of regular expression. @@ -175,10 +198,9 @@ backslashEscape :: [Char] -- ^ list of special characters to escape -> String -- ^ string input -> String backslashEscape special [] = [] -backslashEscape special (x:xs) = if x `elem` special then - '\\':x:(backslashEscape special xs) - else - x:(backslashEscape special xs) +backslashEscape special (x:xs) = if x `elem` special + then '\\':x:(backslashEscape special xs) + else x:(backslashEscape special xs) -- | Escape string by applying a function, but don't touch anything that matches regex. escapePreservingRegex :: (String -> String) -- ^ Escaping function @@ -187,10 +209,9 @@ escapePreservingRegex :: (String -> String) -- ^ Escaping function -> String escapePreservingRegex escapeFunction regex str = case (matchRegexAll regex str) of - Nothing -> escapeFunction str - Just (before, matched, after, _) -> - (escapeFunction before) ++ matched ++ - (escapePreservingRegex escapeFunction regex after) + Nothing -> escapeFunction str + Just (before, matched, after, _) -> (escapeFunction before) ++ + matched ++ (escapePreservingRegex escapeFunction regex after) -- | Returns @True@ if string ends with given character. endsWith :: Char -> [Char] -> Bool @@ -213,10 +234,9 @@ joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst stripTrailingNewlines :: String -> String stripTrailingNewlines "" = "" stripTrailingNewlines str = - if (last str) == '\n' then - stripTrailingNewlines (init str) - else - str + if (last str) == '\n' + then stripTrailingNewlines (init str) + else str -- | Remove leading and trailing space (including newlines) from string. removeLeadingTrailingSpace :: String -> String @@ -224,7 +244,8 @@ removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace -- | Remove leading space (including newlines) from string. removeLeadingSpace :: String -> String -removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') || (x == '\t')) +removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') || + (x == '\t')) -- | Remove trailing space (including newlines) from string. removeTrailingSpace :: String -> String @@ -248,12 +269,17 @@ normalizeSpaces list = removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) removeDoubles (x:rest) = x:(removeDoubles rest) in let removeLeading [] = [] - removeLeading lst = if ((head lst) == Space) then tail lst else lst in + removeLeading lst = if ((head lst) == Space) + then tail lst + else lst in let removeTrailing [] = [] - removeTrailing lst = if ((last lst) == Space) then init lst else lst in + removeTrailing lst = if ((last lst) == Space) + then init lst + else lst in removeLeading $ removeTrailing $ removeDoubles list --- | Change final list item from @Para@ to @Plain@ if the list should be compact. +-- | Change final list item from @Para@ to @Plain@ if the list should +-- be compact. compactify :: [[Block]] -- ^ List of list items (each a list of blocks) -> [[Block]] compactify [] = [] @@ -261,30 +287,34 @@ compactify items = let final = last items others = init items in case final of - [Para a] -> if any containsPara others then items else others ++ [[Plain a]] + [Para a] -> if any containsPara others + then items + else others ++ [[Plain a]] otherwise -> items containsPara :: [Block] -> Bool containsPara [] = False containsPara ((Para a):rest) = True -containsPara ((BulletList items):rest) = (any containsPara items) || (containsPara rest) -containsPara ((OrderedList items):rest) = (any containsPara items) || (containsPara rest) +containsPara ((BulletList items):rest) = (any containsPara items) || + (containsPara rest) +containsPara ((OrderedList items):rest) = (any containsPara items) || + (containsPara rest) containsPara (x:rest) = containsPara rest -- | Options for writers data WriterOptions = WriterOptions - { writerStandalone :: Bool -- ^ If @True@, writer header and footer - , writerTitlePrefix :: String -- ^ Prefix for HTML titles - , writerHeader :: String -- ^ Header for the document - , writerIncludeBefore :: String -- ^ String to include before the document body - , writerIncludeAfter :: String -- ^ String to include after the document body - , writerSmart :: Bool -- ^ If @True@, use smart quotes, dashes, and ellipses - , writerS5 :: Bool -- ^ @True@ if we're writing S5 instead of normal HTML - , writerIncremental :: Bool -- ^ If @True@, display S5 lists incrementally - , writerNumberSections :: Bool -- ^ If @True@, number sections in LaTeX - , writerTabStop :: Int -- ^ Tabstop for conversion between spaces and tabs - } - deriving Show + { writerStandalone :: Bool -- ^ If @True@, writer header and footer + , writerTitlePrefix :: String -- ^ Prefix for HTML titles + , writerHeader :: String -- ^ Header for the document + , writerIncludeBefore :: String -- ^ String to include before the body + , writerIncludeAfter :: String -- ^ String to include after the body + , writerSmart :: Bool -- ^ If @True@, use smart typography + , writerS5 :: Bool -- ^ @True@ if we're writing S5 + , writerIncremental :: Bool -- ^ If @True@, inceremental S5 lists + , writerNumberSections :: Bool -- ^ If @True@, number sections in LaTeX + , writerTabStop :: Int -- ^ Tabstop for conversion between + -- spaces and tabs + } deriving Show -- -- Functions for constructing lists of reference keys @@ -296,10 +326,9 @@ keyFoundIn :: [Block] -- ^ List of key blocks to search -> Target -- ^ Target to search for -> Maybe String keyFoundIn [] src = Nothing -keyFoundIn ((Key [Str num] src1):rest) src = if (src1 == src) then - Just num - else - keyFoundIn rest src +keyFoundIn ((Key [Str num] src1):rest) src = if (src1 == src) + then Just num + else keyFoundIn rest src keyFoundIn (_:rest) src = keyFoundIn rest src -- | Return next unique numerical key, given keyList @@ -308,7 +337,7 @@ nextUniqueKey keys = let nums = [1..10000] notAKey n = not (any (== [Str (show n)]) keys) in case (find notAKey nums) of - Just x -> show x + Just x -> show x Nothing -> error "Could not find unique key for reference link" -- | Generate a reference for a URL (either an existing reference, if @@ -325,8 +354,10 @@ generateReference url title = do Just num -> return (Ref [Str num]) Nothing -> do let nextNum = nextUniqueKey keysUsed - updateState (\st -> st {stateKeyBlocks = (Key [Str nextNum] src):keyBlocks, - stateKeysUsed = [Str nextNum]:keysUsed}) + updateState (\st -> st { stateKeyBlocks = + (Key [Str nextNum] src):keyBlocks, + stateKeysUsed = + [Str nextNum]:keysUsed }) return (Ref [Str nextNum]) -- @@ -348,21 +379,25 @@ keyTable ((Key ref target):lst) = (((ref, target):table), rest) where (table, rest) = keyTable lst keyTable (Null:lst) = keyTable lst -- get rid of Nulls keyTable (Blank:lst) = keyTable lst -- get rid of Blanks -keyTable ((BlockQuote blocks):lst) = ((table1 ++ table2), ((BlockQuote rest1):rest2)) +keyTable ((BlockQuote blocks):lst) = ((table1 ++ table2), + ((BlockQuote rest1):rest2)) where (table1, rest1) = keyTable blocks (table2, rest2) = keyTable lst -keyTable ((Note ref blocks):lst) = ((table1 ++ table2), ((Note ref rest1):rest2)) +keyTable ((Note ref blocks):lst) = ((table1 ++ table2), + ((Note ref rest1):rest2)) where (table1, rest1) = keyTable blocks (table2, rest2) = keyTable lst -keyTable ((OrderedList blockLists):lst) = ((table1 ++ table2), ((OrderedList rest1):rest2)) - where results = map keyTable blockLists - rest1 = map snd results - table1 = concatMap fst results +keyTable ((OrderedList blockLists):lst) = ((table1 ++ table2), + ((OrderedList rest1):rest2)) + where results = map keyTable blockLists + rest1 = map snd results + table1 = concatMap fst results (table2, rest2) = keyTable lst -keyTable ((BulletList blockLists):lst) = ((table1 ++ table2), ((BulletList rest1):rest2)) - where results = map keyTable blockLists - rest1 = map snd results - table1 = concatMap fst results +keyTable ((BulletList blockLists):lst) = ((table1 ++ table2), + ((BulletList rest1):rest2)) + where results = map keyTable blockLists + rest1 = map snd results + table1 = concatMap fst results (table2, rest2) = keyTable lst keyTable (other:lst) = (table, (other:rest)) where (table, rest) = keyTable lst @@ -372,55 +407,79 @@ lookupKeySrc :: KeyTable -- ^ Key table -> [Inline] -- ^ Key -> Maybe Target lookupKeySrc table key = case table of - [] -> Nothing - (k, src):rest -> if (refsMatch k key) then Just src else lookupKeySrc rest key + [] -> Nothing + (k, src):rest -> if (refsMatch k key) + then Just src + else lookupKeySrc rest key -- | Returns @True@ if keys match (case insensitive). refsMatch :: [Inline] -> [Inline] -> Bool -refsMatch ((Str x):restx) ((Str y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Code x):restx) ((Code y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((TeX x):restx) ((TeX y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((NoteRef x):restx) ((NoteRef y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Emph x):restx) ((Emph y):resty) = refsMatch x y && refsMatch restx resty -refsMatch ((Strong x):restx) ((Strong y):resty) = refsMatch x y && refsMatch restx resty +refsMatch ((Str x):restx) ((Str y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Code x):restx) ((Code y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((TeX x):restx) ((TeX y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((NoteRef x):restx) ((NoteRef y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Emph x):restx) ((Emph y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strong x):restx) ((Strong y):resty) = + refsMatch x y && refsMatch restx resty refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty refsMatch [] x = null x refsMatch x [] = null x --- | Replace reference links with explicit links in list of blocks, removing key blocks. +-- | Replace reference links with explicit links in list of blocks, +-- removing key blocks. replaceReferenceLinks :: [Block] -> [Block] replaceReferenceLinks blocks = let (keytable, purged) = keyTable blocks in replaceRefLinksBlockList keytable purged --- | Use key table to replace reference links with explicit links in a list of blocks +-- | Use key table to replace reference links with explicit links in a list +-- of blocks replaceRefLinksBlockList :: KeyTable -> [Block] -> [Block] -replaceRefLinksBlockList keytable lst = map (replaceRefLinksBlock keytable) lst +replaceRefLinksBlockList keytable lst = + map (replaceRefLinksBlock keytable) lst -- | Use key table to replace reference links with explicit links in a block replaceRefLinksBlock :: KeyTable -> Block -> Block -replaceRefLinksBlock keytable (Plain lst) = Plain (map (replaceRefLinksInline keytable) lst) -replaceRefLinksBlock keytable (Para lst) = Para (map (replaceRefLinksInline keytable) lst) -replaceRefLinksBlock keytable (Header lvl lst) = Header lvl (map (replaceRefLinksInline keytable) lst) -replaceRefLinksBlock keytable (BlockQuote lst) = BlockQuote (map (replaceRefLinksBlock keytable) lst) -replaceRefLinksBlock keytable (Note ref lst) = Note ref (map (replaceRefLinksBlock keytable) lst) -replaceRefLinksBlock keytable (OrderedList lst) = OrderedList (map (replaceRefLinksBlockList keytable) lst) -replaceRefLinksBlock keytable (BulletList lst) = BulletList (map (replaceRefLinksBlockList keytable) lst) +replaceRefLinksBlock keytable (Plain lst) = + Plain (map (replaceRefLinksInline keytable) lst) +replaceRefLinksBlock keytable (Para lst) = + Para (map (replaceRefLinksInline keytable) lst) +replaceRefLinksBlock keytable (Header lvl lst) = + Header lvl (map (replaceRefLinksInline keytable) lst) +replaceRefLinksBlock keytable (BlockQuote lst) = + BlockQuote (map (replaceRefLinksBlock keytable) lst) +replaceRefLinksBlock keytable (Note ref lst) = + Note ref (map (replaceRefLinksBlock keytable) lst) +replaceRefLinksBlock keytable (OrderedList lst) = + OrderedList (map (replaceRefLinksBlockList keytable) lst) +replaceRefLinksBlock keytable (BulletList lst) = + BulletList (map (replaceRefLinksBlockList keytable) lst) replaceRefLinksBlock keytable other = other --- | Use key table to replace reference links with explicit links in an inline element. +-- | Use key table to replace reference links with explicit links in an +-- inline element. replaceRefLinksInline :: KeyTable -> Inline -> Inline replaceRefLinksInline keytable (Link text (Ref ref)) = (Link newText newRef) - where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of + where newRef = case lookupKeySrc keytable + (if (null ref) then text else ref) of Nothing -> (Ref ref) Just src -> src newText = map (replaceRefLinksInline keytable) text replaceRefLinksInline keytable (Image text (Ref ref)) = (Image newText newRef) - where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of + where newRef = case lookupKeySrc keytable + (if (null ref) then text else ref) of Nothing -> (Ref ref) Just src -> src newText = map (replaceRefLinksInline keytable) text -replaceRefLinksInline keytable (Emph lst) = Emph (map (replaceRefLinksInline keytable) lst) -replaceRefLinksInline keytable (Strong lst) = Strong (map (replaceRefLinksInline keytable) lst) +replaceRefLinksInline keytable (Emph lst) = + Emph (map (replaceRefLinksInline keytable) lst) +replaceRefLinksInline keytable (Strong lst) = + Strong (map (replaceRefLinksInline keytable) lst) replaceRefLinksInline keytable other = other diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 66590809f..927157ba5 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -4,7 +4,8 @@ -- (c) 2003, OGI School of Science & Engineering, Oregon Health and -- Science University. -- --- Modified by Martin Norbaeck to pass illegal UTF-8 sequences through unchanged. +-- Modified by Martin Norbaeck +-- to pass illegal UTF-8 sequences through unchanged. module Text.Pandoc.UTF8 ( decodeUTF8, encodeUTF8 diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 7ba506acb..1b5201191 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,4 +1,14 @@ --- | Converts Pandoc to HTML. +{- | + Module : Text.Pandoc.Writers.HTML + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of 'Pandoc' documents to HTML. +-} module Text.Pandoc.Writers.HTML ( writeHtml ) where @@ -13,94 +23,108 @@ import Data.List ( isPrefixOf, partition ) -- | Convert Pandoc document to string in HTML format. writeHtml :: WriterOptions -> Pandoc -> String writeHtml options (Pandoc (Meta title authors date) blocks) = - let titlePrefix = writerTitlePrefix options in - let topTitle = if not (null titlePrefix) then - [Str titlePrefix] ++ (if not (null title) then [Str " - "] ++ title else []) - else - title in - let head = if (writerStandalone options) then - htmlHeader options (Meta topTitle authors date) - else - "" - titleBlocks = if (writerStandalone options) && (not (null title)) && - (not (writerS5 options)) then - [RawHtml "<h1 class=\"title\">", Plain title, RawHtml "</h1>\n"] - else - [] - foot = if (writerStandalone options) then "</body>\n</html>\n" else "" - blocks' = replaceReferenceLinks (titleBlocks ++ blocks) - (noteBlocks, blocks'') = partition isNoteBlock blocks' - body = (writerIncludeBefore options) ++ - concatMap (blockToHtml options) blocks'' ++ - footnoteSection options noteBlocks ++ - (writerIncludeAfter options) in - head ++ body ++ foot + let titlePrefix = writerTitlePrefix options in + let topTitle = if not (null titlePrefix) + then [Str titlePrefix] ++ (if not (null title) + then [Str " - "] ++ title + else []) + else title in + let head = if (writerStandalone options) + then htmlHeader options (Meta topTitle authors date) + else "" + titleBlocks = if (writerStandalone options) && (not (null title)) && + (not (writerS5 options)) + then [RawHtml "<h1 class=\"title\">", Plain title, + RawHtml "</h1>\n"] + else [] + foot = if (writerStandalone options) then "</body>\n</html>\n" else "" + blocks' = replaceReferenceLinks (titleBlocks ++ blocks) + (noteBlocks, blocks'') = partition isNoteBlock blocks' + body = (writerIncludeBefore options) ++ + concatMap (blockToHtml options) blocks'' ++ + footnoteSection options noteBlocks ++ + (writerIncludeAfter options) in + head ++ body ++ foot --- | Convert list of Note blocks to a footnote <div>. Assumes notes are sorted. +-- | Convert list of Note blocks to a footnote <div>. +-- Assumes notes are sorted. footnoteSection :: WriterOptions -> [Block] -> String footnoteSection options notes = - if null notes - then "" - else "<div class=\"footnotes\">\n<hr />\n<ol>\n" ++ - concatMap (blockToHtml options) notes ++ - "</ol>\n</div>\n" + if null notes + then "" + else "<div class=\"footnotes\">\n<hr />\n<ol>\n" ++ + concatMap (blockToHtml options) notes ++ + "</ol>\n</div>\n" -- | Obfuscate a "mailto:" link using Javascript. obfuscateLink :: WriterOptions -> [Inline] -> String -> String obfuscateLink options text src = let text' = inlineListToHtml options text in - let linkText = if src == ("mailto:" ++ text') then "e" else "'" ++ text' ++ "'" - altText = if src == ("mailto:" ++ text') then "\\1 [at] \\2" else text' ++ " (\\1 [at] \\2)" in + let linkText = if src == ("mailto:" ++ text') + then "e" + else "'" ++ text' ++ "'" + altText = if src == ("mailto:" ++ text') + then "\\1 [at] \\2" + else text' ++ " (\\1 [at] \\2)" in gsub "mailto:([^@]*)@(.*)" ("<script type=\"text/javascript\">h='\\2';n='\\1';e=n+'@'+h;document.write('<a href=\"mailto:'+e+'\">'+" ++ linkText ++ "+'<\\/a>');</script><noscript>" ++ altText ++ "</noscript>") src -- | Obfuscate character as entity. obfuscateChar :: Char -> String -obfuscateChar char = let num = ord char in - let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in - "&#" ++ numstr ++ ";" +obfuscateChar char = + let num = ord char in + let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in + "&#" ++ numstr ++ ";" -- | Escape string, preserving character entities and quote. stringToHtml :: String -> String -stringToHtml str = escapePreservingRegex stringToHtmlString (mkRegex "\"|(&[[:alnum:]]*;)") str +stringToHtml str = escapePreservingRegex stringToHtmlString + (mkRegex "\"|(&[[:alnum:]]*;)") str -- | Escape string as in 'stringToHtml' but add smart typography filter. stringToSmartHtml :: String -> String stringToSmartHtml = - let escapeDoubleQuotes = - gsub "(\"|")" "”" . -- rest are right quotes - gsub "(\"|")(&r[sd]quo;)" "”\\2" . -- never left quo before right quo - gsub "(&l[sd]quo;)(\"|")" "\\2“" . -- never right quo after left quo - gsub "([ \t])(\"|")" "\\1“" . -- never right quo after space - gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left - gsub "(\"|")('|`|‘)" "”’" . -- right if it got through last filter - gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" . -- "'word left - gsub "``" "“" . - gsub "''" "”" - escapeSingleQuotes = - gsub "'" "’" . -- otherwise right - gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo - gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo - gsub "([ \t])'" "\\1‘" . -- never right quo after space - gsub "`" "‘" . -- ` is left - gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right - gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left - gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left - gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive - gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left - gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. - escapeDashes = gsub " ?-- ?" "—" . - gsub " ?--- ?" "—" . - gsub "([0-9])--?([0-9])" "\\1–\\2" - escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" in - escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . escapeEllipses . stringToHtml + let escapeDoubleQuotes = + gsub "(\"|")" "”" . -- rest are right quotes + gsub "(\"|")(&r[sd]quo;)" "”\\2" . + -- never left quo before right quo + gsub "(&l[sd]quo;)(\"|")" "\\2“" . + -- never right quo after left quo + gsub "([ \t])(\"|")" "\\1“" . + -- never right quo after space + gsub "(\"|")([^,.;:!?^) \t-])" "“\\2" . -- "word left + gsub "(\"|")('|`|‘)" "”’" . + -- right if it got through last filter + gsub "(\"|")('|`|‘)([^,.;:!?^) \t-])" "“‘\\3" . + -- "'word left + gsub "``" "“" . + gsub "''" "”" + escapeSingleQuotes = + gsub "'" "’" . -- otherwise right + gsub "'(&r[sd]quo;)" "’\\1" . -- never left quo before right quo + gsub "(&l[sd]quo;)'" "\\1‘" . -- never right quo after left quo + gsub "([ \t])'" "\\1‘" . -- never right quo after space + gsub "`" "‘" . -- ` is left + gsub "([^,.;:!?^) \t-])'" "\\1’" . -- word' right + gsub "^('|`)([^,.;:!?^) \t-])" "‘\\2" . -- 'word left + gsub "('|`)(\"|"|“|``)" "‘“" . -- '"word left + gsub "([^,.;:!?^) \t-])'(s|S)" "\\1’\\2" . -- possessive + gsub "([[:space:]])'([^,.;:!?^) \t-])" "\\1‘\\2" . -- 'word left + gsub "'([0-9][0-9](s|S))" "’\\1" -- '80s - decade abbrevs. + escapeDashes = + gsub " ?-- ?" "—" . + gsub " ?--- ?" "—" . + gsub "([0-9])--?([0-9])" "\\1–\\2" + escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "…" in + escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . + escapeEllipses . stringToHtml -- | Escape code string as needed for HTML. codeStringToHtml :: String -> String codeStringToHtml [] = [] codeStringToHtml (x:xs) = case x of - '&' -> "&" ++ codeStringToHtml xs - '<' -> "<" ++ codeStringToHtml xs - _ -> x:(codeStringToHtml xs) + '&' -> "&" ++ codeStringToHtml xs + '<' -> "<" ++ codeStringToHtml xs + _ -> x:(codeStringToHtml xs) -- | Escape string to HTML appropriate for attributes attributeStringToHtml :: String -> String @@ -109,17 +133,19 @@ attributeStringToHtml = gsub "\"" """ -- | Returns an HTML header with appropriate bibliographic information. htmlHeader :: WriterOptions -> Meta -> String htmlHeader options (Meta title authors date) = - let titletext = "<title>" ++ (inlineListToHtml options title) ++ "</title>\n" - authortext = if (null authors) then - "" - else - "<meta name=\"author\" content=\"" ++ - (joinWithSep ", " (map stringToHtml authors)) ++ "\" />\n" - datetext = if (date == "") then - "" - else - "<meta name=\"date\" content=\"" ++ (stringToHtml date) ++ "\" />\n" in - (writerHeader options) ++ authortext ++ datetext ++ titletext ++ "</head>\n<body>\n" + let titletext = "<title>" ++ (inlineListToHtml options title) ++ + "</title>\n" + authortext = if (null authors) + then "" + else "<meta name=\"author\" content=\"" ++ + (joinWithSep ", " (map stringToHtml authors)) ++ + "\" />\n" + datetext = if (date == "") + then "" + else "<meta name=\"date\" content=\"" ++ + (stringToHtml date) ++ "\" />\n" in + (writerHeader options) ++ authortext ++ datetext ++ titletext ++ + "</head>\n<body>\n" -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> String @@ -128,85 +154,100 @@ blockToHtml options Null = "" blockToHtml options (Plain lst) = inlineListToHtml options lst blockToHtml options (Para lst) = "<p>" ++ (inlineListToHtml options lst) ++ "</p>\n" blockToHtml options (BlockQuote blocks) = - if (writerS5 options) then -- in S5, treat list in blockquote specially - -- if default is incremental, make it nonincremental; otherwise incremental - let inc = not (writerIncremental options) in - case blocks of - [BulletList lst] -> blockToHtml (options {writerIncremental = inc}) (BulletList lst) - [OrderedList lst] -> blockToHtml (options {writerIncremental = inc}) (OrderedList lst) - otherwise -> "<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++ - "</blockquote>\n" - else - "<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++ "</blockquote>\n" + if (writerS5 options) + then -- in S5, treat list in blockquote specially + -- if default is incremental, make it nonincremental; + -- otherwise incremental + let inc = not (writerIncremental options) in + case blocks of + [BulletList lst] -> blockToHtml (options {writerIncremental = + inc}) (BulletList lst) + [OrderedList lst] -> blockToHtml (options {writerIncremental = + inc}) (OrderedList lst) + otherwise -> "<blockquote>\n" ++ + (concatMap (blockToHtml options) blocks) ++ + "</blockquote>\n" + else "<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++ + "</blockquote>\n" blockToHtml options (Note ref lst) = - let contents = (concatMap (blockToHtml options) lst) in - "<li id=\"fn" ++ ref ++ "\">" ++ contents ++ " <a href=\"#fnref" ++ ref ++ - "\" class=\"footnoteBacklink\" title=\"Jump back to footnote " ++ ref ++ - "\">↩</a></li>\n" + let contents = (concatMap (blockToHtml options) lst) in + "<li id=\"fn" ++ ref ++ "\">" ++ contents ++ " <a href=\"#fnref" ++ ref ++ + "\" class=\"footnoteBacklink\" title=\"Jump back to footnote " ++ ref ++ + "\">↩</a></li>\n" blockToHtml options (Key _ _) = "" -blockToHtml options (CodeBlock str) = "<pre><code>" ++ (codeStringToHtml str) ++ - "\n</code></pre>\n" +blockToHtml options (CodeBlock str) = + "<pre><code>" ++ (codeStringToHtml str) ++ "\n</code></pre>\n" blockToHtml options (RawHtml str) = str blockToHtml options (BulletList lst) = - let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in - "<ul" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ "</ul>\n" + let attribs = if (writerIncremental options) + then " class=\"incremental\"" + else "" in + "<ul" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ + "</ul>\n" blockToHtml options (OrderedList lst) = - let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in - "<ol" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ "</ol>\n" + let attribs = if (writerIncremental options) + then " class=\"incremental\"" + else "" in + "<ol" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ + "</ol>\n" blockToHtml options HorizontalRule = "<hr />\n" blockToHtml options (Header level lst) = - let contents = inlineListToHtml options lst in - let simplify = gsub "<[^>]*>" "" . gsub " " "_" in - if ((level > 0) && (level <= 6)) - then "<a id=\"" ++ simplify contents ++ "\"></a>\n" ++ - "<h" ++ (show level) ++ ">" ++ contents ++ - "</h" ++ (show level) ++ ">\n" - else "<p>" ++ contents ++ "</p>\n" -listItemToHtml options list = "<li>" ++ (concatMap (blockToHtml options) list) ++ "</li>\n" + let contents = inlineListToHtml options lst in + let simplify = gsub "<[^>]*>" "" . gsub " " "_" in + if ((level > 0) && (level <= 6)) + then "<a id=\"" ++ simplify contents ++ "\"></a>\n" ++ + "<h" ++ (show level) ++ ">" ++ contents ++ + "</h" ++ (show level) ++ ">\n" + else "<p>" ++ contents ++ "</p>\n" +listItemToHtml options list = + "<li>" ++ (concatMap (blockToHtml options) list) ++ "</li>\n" -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> String inlineListToHtml options lst = - -- consolidate adjacent Str and Space elements for more intelligent - -- smart typography filtering - let lst' = consolidateList lst in - concatMap (inlineToHtml options) lst' + -- consolidate adjacent Str and Space elements for more intelligent + -- smart typography filtering + let lst' = consolidateList lst in + concatMap (inlineToHtml options) lst' -- | Convert Pandoc inline element to HTML. inlineToHtml :: WriterOptions -> Inline -> String -inlineToHtml options (Emph lst) = "<em>" ++ (inlineListToHtml options lst) ++ "</em>" -inlineToHtml options (Strong lst) = "<strong>" ++ (inlineListToHtml options lst) ++ "</strong>" -inlineToHtml options (Code str) = "<code>" ++ (codeStringToHtml str) ++ "</code>" -inlineToHtml options (Str str) = if (writerSmart options) then - stringToSmartHtml str - else - stringToHtml str +inlineToHtml options (Emph lst) = + "<em>" ++ (inlineListToHtml options lst) ++ "</em>" +inlineToHtml options (Strong lst) = + "<strong>" ++ (inlineListToHtml options lst) ++ "</strong>" +inlineToHtml options (Code str) = + "<code>" ++ (codeStringToHtml str) ++ "</code>" +inlineToHtml options (Str str) = + if (writerSmart options) then stringToSmartHtml str else stringToHtml str inlineToHtml options (TeX str) = (codeStringToHtml str) inlineToHtml options (HtmlInline str) = str inlineToHtml options (LineBreak) = "<br />\n" inlineToHtml options Space = " " inlineToHtml options (Link text (Src src tit)) = - let title = attributeStringToHtml tit in - if (isPrefixOf "mailto:" src) then - obfuscateLink options text src - else - "<a href=\"" ++ (codeStringToHtml src) ++ "\"" ++ - (if tit /= "" then " title=\"" ++ title ++ "\">" else ">") ++ - (inlineListToHtml options text) ++ "</a>" -inlineToHtml options (Link text (Ref [])) = "[" ++ (inlineListToHtml options text) ++ "]" -inlineToHtml options (Link text (Ref ref)) = "[" ++ (inlineListToHtml options text) ++ "][" ++ - (inlineListToHtml options ref) ++ "]" -- this is what markdown does, for better or worse + let title = attributeStringToHtml tit in + if (isPrefixOf "mailto:" src) + then obfuscateLink options text src + else "<a href=\"" ++ (codeStringToHtml src) ++ "\"" ++ + (if tit /= "" then " title=\"" ++ title ++ "\">" else ">") ++ + (inlineListToHtml options text) ++ "</a>" +inlineToHtml options (Link text (Ref [])) = + "[" ++ (inlineListToHtml options text) ++ "]" +inlineToHtml options (Link text (Ref ref)) = + "[" ++ (inlineListToHtml options text) ++ "][" ++ + (inlineListToHtml options ref) ++ "]" + -- this is what markdown does, for better or worse inlineToHtml options (Image alt (Src source tit)) = - let title = attributeStringToHtml tit - alternate = inlineListToHtml options alt in - "<img src=\"" ++ source ++ "\"" ++ - (if tit /= "" then " title=\"" ++ title ++ "\"" else "") ++ - (if alternate /= "" then " alt=\"" ++ alternate ++ "\"" else "") ++ ">" + let title = attributeStringToHtml tit + alternate = inlineListToHtml options alt in + "<img src=\"" ++ source ++ "\"" ++ + (if tit /= "" then " title=\"" ++ title ++ "\"" else "") ++ + (if alternate /= "" then " alt=\"" ++ alternate ++ "\"" else "") ++ ">" inlineToHtml options (Image alternate (Ref [])) = - "![" ++ (inlineListToHtml options alternate) ++ "]" + "![" ++ (inlineListToHtml options alternate) ++ "]" inlineToHtml options (Image alternate (Ref ref)) = - "![" ++ (inlineListToHtml options alternate) ++ "][" ++ (inlineListToHtml options ref) ++ "]" + "![" ++ (inlineListToHtml options alternate) ++ "][" ++ + (inlineListToHtml options ref) ++ "]" inlineToHtml options (NoteRef ref) = - "<sup class=\"footnoteRef\" id=\"fnref" ++ ref ++ "\"><a href=\"#fn" ++ ref ++ - "\">" ++ ref ++ "</a></sup>" - + "<sup class=\"footnoteRef\" id=\"fnref" ++ ref ++ "\"><a href=\"#fn" ++ + ref ++ "\">" ++ ref ++ "</a></sup>" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 22a96a423..3a3d249e9 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,4 +1,14 @@ --- | Convert Pandoc to LaTeX. +{- | + Module : Text.Pandoc.Writers.LaTeX + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of 'Pandoc' format into LaTeX. +-} module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where @@ -9,46 +19,40 @@ import List ( (\\) ) -- | Convert Pandoc to LaTeX. writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options (Pandoc meta blocks) = - let notes = filter isNoteBlock blocks in -- assumes all notes are at outer level - let body = (writerIncludeBefore options) ++ - (concatMap (blockToLaTeX notes) (replaceReferenceLinks blocks)) ++ - (writerIncludeAfter options) in - let head = if writerStandalone options then - latexHeader notes options meta - else - "" in - let foot = if writerStandalone options then "\n\\end{document}\n" else "" in - head ++ body ++ foot + let notes = filter isNoteBlock blocks in -- assumes all notes at outer level + let body = (writerIncludeBefore options) ++ + (concatMap (blockToLaTeX notes) + (replaceReferenceLinks blocks)) ++ + (writerIncludeAfter options) in + let head = if writerStandalone options + then latexHeader notes options meta + else "" in + let foot = if writerStandalone options then "\n\\end{document}\n" else "" in + head ++ body ++ foot -- | Insert bibliographic information into LaTeX header. -latexHeader :: [Block] -- ^ List of note blocks to use in resolving note refs - -> WriterOptions -- ^ Options, including LaTeX header - -> Meta -- ^ Meta with bibliographic information +latexHeader :: [Block] -- ^ List of note blocks to use in resolving note refs + -> WriterOptions -- ^ Options, including LaTeX header + -> Meta -- ^ Meta with bibliographic information -> String latexHeader notes options (Meta title authors date) = - let titletext = if null title then - "" - else - "\\title{" ++ inlineListToLaTeX notes title ++ "}\n" - authorstext = if null authors then - "" - else - "\\author{" ++ (joinWithSep "\\\\" (map stringToLaTeX authors)) ++ "}\n" - datetext = if date == "" then - "" - else - "\\date{" ++ stringToLaTeX date ++ "}\n" - maketitle = if null title then - "" - else - "\\maketitle\n" - secnumline = if (writerNumberSections options) then - "" - else - "\\setcounter{secnumdepth}{0}\n" - header = writerHeader options in - header ++ secnumline ++ titletext ++ authorstext ++ datetext ++ "\\begin{document}\n" ++ maketitle - + let titletext = if null title + then "" + else "\\title{" ++ inlineListToLaTeX notes title ++ "}\n" + authorstext = if null authors + then "" + else "\\author{" ++ (joinWithSep "\\\\" + (map stringToLaTeX authors)) ++ "}\n" + datetext = if date == "" + then "" + else "\\date{" ++ stringToLaTeX date ++ "}\n" + maketitle = if null title then "" else "\\maketitle\n" + secnumline = if (writerNumberSections options) + then "" + else "\\setcounter{secnumdepth}{0}\n" + header = writerHeader options in + header ++ secnumline ++ titletext ++ authorstext ++ datetext ++ + "\\begin{document}\n" ++ maketitle -- escape things as needed for LaTeX (also ldots, dashes, quotes, etc.) @@ -77,7 +81,8 @@ escapeSingleQuotes = gsub "([^[:punct:][:space:]])`(s|S)" "\\1'\\2" . -- catch possessives gsub "^'([^[:punct:][:space:]])" "`\\1" . -- 'word left gsub "([[:space:]])'" "\\1`" . -- never right quote after space - gsub "([[:space:]])'([^[:punct:][:space:]])" "\\1`\\2" -- 'word left (leave possessives) + gsub "([[:space:]])'([^[:punct:][:space:]])" "\\1`\\2" + -- 'word left (leave possessives) escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "\\ldots{}" @@ -85,12 +90,14 @@ escapeDashes = gsub "([0-9])-([0-9])" "\\1--\\2" . gsub " *--- *" "---" . gsub "([^-])--([^-])" "\\1---\\2" -escapeSmart = escapeDashes . escapeSingleQuotes . escapeDoubleQuotes . escapeEllipses +escapeSmart = escapeDashes . escapeSingleQuotes . escapeDoubleQuotes . + escapeEllipses -- | Escape string for LaTeX (including smart quotes, dashes, ellipses) stringToLaTeX :: String -> String stringToLaTeX = escapeSmart . escapeGt . escapeLt . escapeBar . escapeHat . - escapeSpecial . fixBackslash . escapeBrackets . escapeBackslash + escapeSpecial . fixBackslash . escapeBrackets . + escapeBackslash -- | Remove all code elements from list of inline elements -- (because it's illegal to have a \\verb inside a command argument) @@ -107,43 +114,47 @@ blockToLaTeX notes Blank = "\n" blockToLaTeX notes Null = "" blockToLaTeX notes (Plain lst) = inlineListToLaTeX notes lst ++ "\n" blockToLaTeX notes (Para lst) = (inlineListToLaTeX notes lst) ++ "\n\n" -blockToLaTeX notes (BlockQuote lst) = - "\\begin{quote}\n" ++ (concatMap (blockToLaTeX notes) lst) ++ "\\end{quote}\n" +blockToLaTeX notes (BlockQuote lst) = "\\begin{quote}\n" ++ + (concatMap (blockToLaTeX notes) lst) ++ "\\end{quote}\n" blockToLaTeX notes (Note ref lst) = "" blockToLaTeX notes (Key _ _) = "" -blockToLaTeX notes (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++ "\n\\end{verbatim}\n" +blockToLaTeX notes (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++ + "\n\\end{verbatim}\n" blockToLaTeX notes (RawHtml str) = "" -blockToLaTeX notes (BulletList lst) = - "\\begin{itemize}\n" ++ (concatMap (listItemToLaTeX notes) lst) ++ "\\end{itemize}\n" -blockToLaTeX notes (OrderedList lst) = - "\\begin{enumerate}\n" ++ (concatMap (listItemToLaTeX notes) lst) ++ "\\end{enumerate}\n" -blockToLaTeX notes HorizontalRule = "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n" +blockToLaTeX notes (BulletList lst) = "\\begin{itemize}\n" ++ + (concatMap (listItemToLaTeX notes) lst) ++ "\\end{itemize}\n" +blockToLaTeX notes (OrderedList lst) = "\\begin{enumerate}\n" ++ + (concatMap (listItemToLaTeX notes) lst) ++ "\\end{enumerate}\n" +blockToLaTeX notes HorizontalRule = + "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n" blockToLaTeX notes (Header level lst) = - if (level > 0) && (level <= 3) then - "\\" ++ (concat (replicate (level - 1) "sub")) ++ "section{" ++ - (inlineListToLaTeX notes (deVerb lst)) ++ "}\n\n" - else - (inlineListToLaTeX notes lst) ++ "\n\n" -listItemToLaTeX notes list = "\\item " ++ (concatMap (blockToLaTeX notes) list) + if (level > 0) && (level <= 3) + then "\\" ++ (concat (replicate (level - 1) "sub")) ++ "section{" ++ + (inlineListToLaTeX notes (deVerb lst)) ++ "}\n\n" + else (inlineListToLaTeX notes lst) ++ "\n\n" +listItemToLaTeX notes list = "\\item " ++ + (concatMap (blockToLaTeX notes) list) -- | Convert list of inline elements to LaTeX. inlineListToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs -> [Inline] -- ^ Inlines to convert -> String inlineListToLaTeX notes lst = - -- first, consolidate Str and Space for more effective smartquotes: - let lst' = consolidateList lst in - concatMap (inlineToLaTeX notes) lst' + -- first, consolidate Str and Space for more effective smartquotes: + let lst' = consolidateList lst in + concatMap (inlineToLaTeX notes) lst' -- | Convert inline element to LaTeX inlineToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs -> Inline -- ^ Inline to convert -> String -inlineToLaTeX notes (Emph lst) = "\\emph{" ++ (inlineListToLaTeX notes (deVerb lst)) ++ "}" -inlineToLaTeX notes (Strong lst) = "\\textbf{" ++ (inlineListToLaTeX notes (deVerb lst)) ++ "}" +inlineToLaTeX notes (Emph lst) = "\\emph{" ++ + (inlineListToLaTeX notes (deVerb lst)) ++ "}" +inlineToLaTeX notes (Strong lst) = "\\textbf{" ++ + (inlineListToLaTeX notes (deVerb lst)) ++ "}" inlineToLaTeX notes (Code str) = "\\verb" ++ [chr] ++ stuffing ++ [chr] - where stuffing = str - chr = ((enumFromTo '!' '~') \\ stuffing) !! 0 + where stuffing = str + chr = ((enumFromTo '!' '~') \\ stuffing) !! 0 inlineToLaTeX notes (Str str) = stringToLaTeX str inlineToLaTeX notes (TeX str) = str inlineToLaTeX notes (HtmlInline str) = "" @@ -151,18 +162,22 @@ inlineToLaTeX notes (LineBreak) = "\\\\\n" inlineToLaTeX notes Space = " " inlineToLaTeX notes (Link text (Src src tit)) = "\\href{" ++ src ++ "}{" ++ (inlineListToLaTeX notes (deVerb text)) ++ "}" -inlineToLaTeX notes (Link text (Ref [])) = "[" ++ (inlineListToLaTeX notes text) ++ "]" -inlineToLaTeX notes (Link text (Ref ref)) = "[" ++ (inlineListToLaTeX notes text) ++ "][" ++ - (inlineListToLaTeX notes ref) ++ "]" -- this is what markdown does, for better or worse -inlineToLaTeX notes (Image alternate (Src source tit)) = "\\includegraphics{" ++ source ++ "}" +inlineToLaTeX notes (Link text (Ref [])) = "[" ++ + (inlineListToLaTeX notes text) ++ "]" +inlineToLaTeX notes (Link text (Ref ref)) = "[" ++ + (inlineListToLaTeX notes text) ++ "][" ++ (inlineListToLaTeX notes ref) ++ + "]" -- this is what markdown does, for better or worse +inlineToLaTeX notes (Image alternate (Src source tit)) = + "\\includegraphics{" ++ source ++ "}" inlineToLaTeX notes (Image alternate (Ref [])) = "![" ++ (inlineListToLaTeX notes alternate) ++ "]" inlineToLaTeX notes (Image alternate (Ref ref)) = - "![" ++ (inlineListToLaTeX notes alternate) ++ "][" ++ (inlineListToLaTeX notes ref) ++ "]" + "![" ++ (inlineListToLaTeX notes alternate) ++ "][" ++ + (inlineListToLaTeX notes ref) ++ "]" inlineToLaTeX [] (NoteRef ref) = "" inlineToLaTeX ((Note firstref firstblocks):rest) (NoteRef ref) = - if (firstref == ref) then - "\\footnote{" ++ (stripTrailingNewlines (concatMap (blockToLaTeX rest) firstblocks)) ++ "}" - else - inlineToLaTeX rest (NoteRef ref) + if (firstref == ref) + then "\\footnote{" ++ (stripTrailingNewlines + (concatMap (blockToLaTeX rest) firstblocks)) ++ "}" + else inlineToLaTeX rest (NoteRef ref) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 55d0eb2e1..eded63425 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,4 +1,16 @@ --- | Converts Pandoc to Markdown. +{- | + Module : Text.Pandoc.Writers.Markdown + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of 'Pandoc' documents to markdown-formatted plain text. + +Markdown: http://daringfireball.net/projects/markdown/ +-} module Text.Pandoc.Writers.Markdown ( writeMarkdown ) where @@ -11,11 +23,11 @@ import Text.PrettyPrint.HughesPJ hiding ( Str ) writeMarkdown :: WriterOptions -> Pandoc -> String writeMarkdown options (Pandoc meta blocks) = let body = text (writerIncludeBefore options) <> - vcat (map (blockToMarkdown (writerTabStop options)) (formatKeys blocks)) $$ text (writerIncludeAfter options) in - let head = if (writerStandalone options) then - ((metaToMarkdown meta) $$ text (writerHeader options)) - else - empty in + vcat (map (blockToMarkdown (writerTabStop options)) + (formatKeys blocks)) $$ text (writerIncludeAfter options) in + let head = if (writerStandalone options) + then ((metaToMarkdown meta) $$ text (writerHeader options)) + else empty in render $ head <> body -- | Escape special characters for Markdown. @@ -28,13 +40,15 @@ escapeLinkTitle = gsub "\"" "\\\\\"" -- | Take list of inline elements and return wrapped doc. wrappedMarkdown :: [Inline] -> Doc -wrappedMarkdown lst = fsep $ map (fcat . (map inlineToMarkdown)) (splitBySpace lst) +wrappedMarkdown lst = fsep $ + map (fcat . (map inlineToMarkdown)) (splitBySpace lst) -- | Insert Blank block between key and non-key formatKeys :: [Block] -> [Block] formatKeys [] = [] formatKeys [x] = [x] -formatKeys ((Key x1 y1):(Key x2 y2):rest) = (Key x1 y1):(formatKeys ((Key x2 y2):rest)) +formatKeys ((Key x1 y1):(Key x2 y2):rest) = + (Key x1 y1):(formatKeys ((Key x2 y2):rest)) formatKeys ((Key x1 y1):rest) = (Key x1 y1):Blank:(formatKeys rest) formatKeys (x:(Key x1 y1):rest) = x:Blank:(formatKeys ((Key x1 y1):rest)) formatKeys (x:rest) = x:(formatKeys rest) @@ -43,17 +57,18 @@ formatKeys (x:rest) = x:(formatKeys rest) metaToMarkdown :: Meta -> Doc metaToMarkdown (Meta [] [] "") = empty metaToMarkdown (Meta title [] "") = (titleToMarkdown title) <> (text "\n") -metaToMarkdown (Meta title authors "") = - (titleToMarkdown title) <> (text "\n") <> (authorsToMarkdown authors) <> (text "\n") -metaToMarkdown (Meta title authors date) = - (titleToMarkdown title) <> (text "\n") <> (authorsToMarkdown authors) <> - (text "\n") <> (dateToMarkdown date) <> (text "\n") +metaToMarkdown (Meta title authors "") = (titleToMarkdown title) <> + (text "\n") <> (authorsToMarkdown authors) <> (text "\n") +metaToMarkdown (Meta title authors date) = (titleToMarkdown title) <> + (text "\n") <> (authorsToMarkdown authors) <> (text "\n") <> + (dateToMarkdown date) <> (text "\n") titleToMarkdown :: [Inline] -> Doc titleToMarkdown lst = text "% " <> (inlineListToMarkdown lst) authorsToMarkdown :: [String] -> Doc -authorsToMarkdown lst = text "% " <> text (joinWithSep ", " (map escapeString lst)) +authorsToMarkdown lst = + text "% " <> text (joinWithSep ", " (map escapeString lst)) dateToMarkdown :: String -> Doc dateToMarkdown str = text "% " <> text (escapeString str) @@ -67,33 +82,34 @@ blockToMarkdown tabStop Null = empty blockToMarkdown tabStop (Plain lst) = wrappedMarkdown lst blockToMarkdown tabStop (Para lst) = (wrappedMarkdown lst) <> (text "\n") blockToMarkdown tabStop (BlockQuote lst) = - (vcat $ map (\line -> (text "> ") <> (text line)) $ lines $ render $ vcat $ - map (blockToMarkdown tabStop) lst) <> (text "\n") + (vcat $ map (\line -> (text "> ") <> (text line)) $ lines $ render $ vcat $ + map (blockToMarkdown tabStop) lst) <> (text "\n") blockToMarkdown tabStop (Note ref lst) = - let lns = lines $ render $ vcat $ map (blockToMarkdown tabStop) lst in - if null lns then - empty - else - let first = head lns - rest = tail lns in - text ("[^" ++ (escapeString ref) ++ "]: ") <> (text first) $$ (vcat $ - map (\line -> (text " ") <> (text line)) rest) <> text "\n" + let lns = lines $ render $ vcat $ map (blockToMarkdown tabStop) lst in + if null lns + then empty + else let first = head lns + rest = tail lns in + text ("[^" ++ (escapeString ref) ++ "]: ") <> (text first) $$ + (vcat $ map (\line -> (text " ") <> (text line)) rest) <> + text "\n" blockToMarkdown tabStop (Key txt (Src src tit)) = - text " " <> char '[' <> inlineListToMarkdown txt <> char ']' <> text ": " <> text src <> - (if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty) -blockToMarkdown tabStop (CodeBlock str) = (nest tabStop $ vcat $ map text (lines str)) <> - text "\n" + text " " <> char '[' <> inlineListToMarkdown txt <> char ']' <> + text ": " <> text src <> + if tit /= "" then text (" \"" ++ (escapeLinkTitle tit) ++ "\"") else empty +blockToMarkdown tabStop (CodeBlock str) = + (nest tabStop $ vcat $ map text (lines str)) <> text "\n" blockToMarkdown tabStop (RawHtml str) = text str blockToMarkdown tabStop (BulletList lst) = - vcat (map (bulletListItemToMarkdown tabStop) lst) <> text "\n" + vcat (map (bulletListItemToMarkdown tabStop) lst) <> text "\n" blockToMarkdown tabStop (OrderedList lst) = - vcat (zipWith (orderedListItemToMarkdown tabStop) (enumFromTo 1 (length lst)) lst) <> - text "\n" + vcat (zipWith (orderedListItemToMarkdown tabStop) + (enumFromTo 1 (length lst)) lst) <> text "\n" blockToMarkdown tabStop HorizontalRule = text "\n* * * * *\n" -blockToMarkdown tabStop (Header level lst) = - text ((replicate level '#') ++ " ") <> (inlineListToMarkdown lst) <> (text "\n") +blockToMarkdown tabStop (Header level lst) = text ((replicate level '#') ++ + " ") <> (inlineListToMarkdown lst) <> (text "\n") bulletListItemToMarkdown tabStop list = - hang (text "- ") tabStop (vcat (map (blockToMarkdown tabStop) list)) + hang (text "- ") tabStop (vcat (map (blockToMarkdown tabStop) list)) -- | Convert ordered list item (a list of blocks) to markdown. orderedListItemToMarkdown :: Int -- ^ tab stop @@ -101,8 +117,9 @@ orderedListItemToMarkdown :: Int -- ^ tab stop -> [Block] -- ^ list item (list of blocks) -> Doc orderedListItemToMarkdown tabStop num list = - hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat (map (blockToMarkdown tabStop) list)) - where spacer = if (num < 10) then " " else "" + hang (text ((show num) ++ "." ++ spacer)) tabStop + (vcat (map (blockToMarkdown tabStop) list)) + where spacer = if (num < 10) then " " else "" -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: [Inline] -> Doc @@ -110,39 +127,46 @@ inlineListToMarkdown lst = hcat $ map inlineToMarkdown lst -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: Inline -> Doc -inlineToMarkdown (Emph lst) = text "*" <> (inlineListToMarkdown lst) <> text "*" -inlineToMarkdown (Strong lst) = text "**" <> (inlineListToMarkdown lst) <> text "**" +inlineToMarkdown (Emph lst) = text "*" <> + (inlineListToMarkdown lst) <> text "*" +inlineToMarkdown (Strong lst) = text "**" <> + (inlineListToMarkdown lst) <> text "**" inlineToMarkdown (Code str) = - case (matchRegex (mkRegex "``") str) of - Just match -> text ("` " ++ str ++ " `") - Nothing -> case (matchRegex (mkRegex "`") str) of - Just match -> text ("`` " ++ str ++ " ``") - Nothing -> text ("`" ++ str ++ "`") + case (matchRegex (mkRegex "``") str) of + Just match -> text ("` " ++ str ++ " `") + Nothing -> case (matchRegex (mkRegex "`") str) of + Just match -> text ("`` " ++ str ++ " ``") + Nothing -> text ("`" ++ str ++ "`") inlineToMarkdown (Str str) = text $ escapeString str inlineToMarkdown (TeX str) = text str inlineToMarkdown (HtmlInline str) = text str inlineToMarkdown (LineBreak) = text " \n" inlineToMarkdown Space = char ' ' inlineToMarkdown (Link txt (Src src tit)) = - let linktext = if (null txt) || (txt == [Str ""]) then - text "link" - else - inlineListToMarkdown txt in - char '[' <> linktext <> char ']' <> char '(' <> text src <> - (if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty) <> char ')' -inlineToMarkdown (Link txt (Ref [])) = char '[' <> inlineListToMarkdown txt <> text "][]" -inlineToMarkdown (Link txt (Ref ref)) = char '[' <> inlineListToMarkdown txt <> char ']' <> - char '[' <> inlineListToMarkdown ref <> char ']' + let linktext = if (null txt) || (txt == [Str ""]) + then text "link" + else inlineListToMarkdown txt in + char '[' <> linktext <> char ']' <> char '(' <> text src <> + (if tit /= "" + then text (" \"" ++ (escapeLinkTitle tit) ++ "\"") + else empty) <> char ')' +inlineToMarkdown (Link txt (Ref [])) = + char '[' <> inlineListToMarkdown txt <> text "][]" +inlineToMarkdown (Link txt (Ref ref)) = + char '[' <> inlineListToMarkdown txt <> char ']' <> char '[' <> + inlineListToMarkdown ref <> char ']' inlineToMarkdown (Image alternate (Src source tit)) = - let alt = if (null alternate) || (alternate == [Str ""]) then - text "image" - else - inlineListToMarkdown alternate in - char '!' <> char '[' <> alt <> char ']' <> char '(' <> text source <> - (if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty) <> char ')' + let alt = if (null alternate) || (alternate == [Str ""]) + then text "image" + else inlineListToMarkdown alternate in + char '!' <> char '[' <> alt <> char ']' <> char '(' <> text source <> + (if tit /= "" + then text (" \"" ++ (escapeLinkTitle tit) ++ "\"") + else empty) <> char ')' inlineToMarkdown (Image alternate (Ref [])) = - char '!' <> char '[' <> inlineListToMarkdown alternate <> char ']' + char '!' <> char '[' <> inlineListToMarkdown alternate <> char ']' inlineToMarkdown (Image alternate (Ref ref)) = - char '!' <> char '[' <> inlineListToMarkdown alternate <> char ']' <> - char '[' <> inlineListToMarkdown ref <> char ']' -inlineToMarkdown (NoteRef ref) = text "[^" <> text (escapeString ref) <> char ']' + char '!' <> char '[' <> inlineListToMarkdown alternate <> char ']' <> + char '[' <> inlineListToMarkdown ref <> char ']' +inlineToMarkdown (NoteRef ref) = + text "[^" <> text (escapeString ref) <> char ']' diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index cc2bc6499..e42279ef4 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,4 +1,16 @@ --- | Converts Pandoc to reStructuredText. +{- | + Module : Text.Pandoc.Writers.RST + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of 'Pandoc' documents to reStructuredText. + +reStructuredText: http://docutils.sourceforge.net/rst.html +-} module Text.Pandoc.Writers.RST ( writeRST ) where @@ -10,40 +22,44 @@ import Text.PrettyPrint.HughesPJ hiding ( Str ) -- | Convert Pandoc to reStructuredText. writeRST :: WriterOptions -> Pandoc -> String writeRST options (Pandoc meta blocks) = - let (main, refs) = unzip $ map (blockToRST (writerTabStop options)) + let (main, refs) = unzip $ map (blockToRST (writerTabStop options)) (reformatBlocks $ replaceReferenceLinks blocks) - top = if (writerStandalone options) then - (metaToRST meta) $$ text (writerHeader options) - else - empty in - -- remove duplicate keys - let refs' = nubBy (\x y -> (render x) == (render y)) refs in - let body = text (writerIncludeBefore options) <> - vcat main $$ text (writerIncludeAfter options) in - render $ top <> body $$ vcat refs' $$ text "\n" + top = if (writerStandalone options) + then (metaToRST meta) $$ text (writerHeader options) + else empty in + -- remove duplicate keys + let refs' = nubBy (\x y -> (render x) == (render y)) refs in + let body = text (writerIncludeBefore options) <> + vcat main $$ text (writerIncludeAfter options) in + render $ top <> body $$ vcat refs' $$ text "\n" -- | Escape special RST characters. escapeString :: String -> String escapeString = backslashEscape "`\\|*_" --- | Convert list of inline elements into one 'Doc' of wrapped text and another --- containing references. +-- | Convert list of inline elements into one 'Doc' of wrapped text +-- and another containing references. wrappedRST :: [Inline] -> (Doc, Doc) wrappedRST lst = - let words = splitBySpace lst in - (fsep $ map (fcat . (map (fst . inlineToRST))) words, vcat (map (snd . inlineToRST) lst)) + let words = splitBySpace lst in + ( fsep $ map (fcat . (map (fst . inlineToRST))) words, + vcat (map (snd . inlineToRST) lst) ) -- | Remove reference keys, and make sure there are blanks before each list. reformatBlocks :: [Block] -> [Block] reformatBlocks [] = [] reformatBlocks ((Plain x):(OrderedList y):rest) = (Para x):(reformatBlocks ((OrderedList y):rest)) -reformatBlocks ((Plain x):(BulletList y):rest) = (Para x):(reformatBlocks ((BulletList y):rest)) +reformatBlocks ((Plain x):(BulletList y):rest) = + (Para x):(reformatBlocks ((BulletList y):rest)) reformatBlocks ((OrderedList x):rest) = (OrderedList (map reformatBlocks x)):(reformatBlocks rest) -reformatBlocks ((BulletList x):rest) = (BulletList (map reformatBlocks x)):(reformatBlocks rest) -reformatBlocks ((BlockQuote x):rest) = (BlockQuote (reformatBlocks x)):(reformatBlocks rest) -reformatBlocks ((Note ref x):rest) = (Note ref (reformatBlocks x)):(reformatBlocks rest) +reformatBlocks ((BulletList x):rest) = + (BulletList (map reformatBlocks x)):(reformatBlocks rest) +reformatBlocks ((BlockQuote x):rest) = + (BlockQuote (reformatBlocks x)):(reformatBlocks rest) +reformatBlocks ((Note ref x):rest) = + (Note ref (reformatBlocks x)):(reformatBlocks rest) reformatBlocks ((Key x1 y1):rest) = reformatBlocks rest reformatBlocks (x:rest) = x:(reformatBlocks rest) @@ -56,15 +72,16 @@ metaToRST (Meta title authors date) = titleToRST :: [Inline] -> Doc titleToRST [] = empty titleToRST lst = - let title = fst $ inlineListToRST lst in - let titleLength = length $ render title in - let border = text (replicate titleLength '=') in - border <> char '\n' <> title <> char '\n' <> border <> text "\n\n" + let title = fst $ inlineListToRST lst in + let titleLength = length $ render title in + let border = text (replicate titleLength '=') in + border <> char '\n' <> title <> char '\n' <> border <> text "\n\n" -- | Convert author list to 'Doc'. authorsToRST :: [String] -> Doc authorsToRST [] = empty -authorsToRST (first:rest) = text ":Author: " <> text first <> char '\n' <> (authorsToRST rest) +authorsToRST (first:rest) = text ":Author: " <> text first <> + char '\n' <> (authorsToRST rest) -- | Convert date to 'Doc'. dateToRST :: String -> Doc @@ -80,36 +97,38 @@ blockToRST tabStop Blank = (text "\n", empty) blockToRST tabStop Null = (empty, empty) blockToRST tabStop (Plain lst) = wrappedRST lst blockToRST tabStop (Para [TeX str]) = -- raw latex block - let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in - (hang (text "\n.. raw:: latex\n") 3 (vcat $ map text (lines str')), empty) -blockToRST tabStop (Para lst) = ((fst $ wrappedRST lst) <> (text "\n"), snd $ wrappedRST lst) + let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in + (hang (text "\n.. raw:: latex\n") 3 (vcat $ map text (lines str')), empty) +blockToRST tabStop (Para lst) = ( (fst $ wrappedRST lst) <> (text "\n"), + snd $ wrappedRST lst ) blockToRST tabStop (BlockQuote lst) = - let (main, refs) = unzip $ map (blockToRST tabStop) lst in - ((nest tabStop $ vcat $ main) <> text "\n", vcat refs) + let (main, refs) = unzip $ map (blockToRST tabStop) lst in + ((nest tabStop $ vcat $ main) <> text "\n", vcat refs) blockToRST tabStop (Note ref blocks) = - let (main, refs) = unzip $ map (blockToRST tabStop) blocks in - ((hang (text ".. [" <> text (escapeString ref) <> text "] ") 3 (vcat main)), vcat refs) + let (main, refs) = unzip $ map (blockToRST tabStop) blocks in + ((hang (text ".. [" <> text (escapeString ref) <> text "] ") 3 (vcat main)), + vcat refs) blockToRST tabStop (Key txt (Src src tit)) = - (text "ERROR - KEY FOUND", empty) -- shouldn't have a key here -blockToRST tabStop (CodeBlock str) = - (hang (text "::\n") tabStop (vcat $ map text (lines ('\n':(str ++ "\n\n")))), empty) + (text "ERROR - KEY FOUND", empty) -- shouldn't have a key here +blockToRST tabStop (CodeBlock str) = (hang (text "::\n") tabStop + (vcat $ map text (lines ('\n':(str ++ "\n\n")))), empty) blockToRST tabStop (RawHtml str) = - let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in - (hang (text "\n.. raw:: html\n") 3 (vcat $ map text (lines str')), empty) + let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in + (hang (text "\n.. raw:: html\n") 3 (vcat $ map text (lines str')), empty) blockToRST tabStop (BulletList lst) = - let (main, refs) = unzip $ map (bulletListItemToRST tabStop) lst in - (vcat main <> text "\n", vcat refs) + let (main, refs) = unzip $ map (bulletListItemToRST tabStop) lst in + (vcat main <> text "\n", vcat refs) blockToRST tabStop (OrderedList lst) = - let (main, refs) = - unzip $ zipWith (orderedListItemToRST tabStop) (enumFromTo 1 (length lst)) lst in - (vcat main <> text "\n", vcat refs) + let (main, refs) = unzip $ zipWith (orderedListItemToRST tabStop) + (enumFromTo 1 (length lst)) lst in + (vcat main <> text "\n", vcat refs) blockToRST tabStop HorizontalRule = (text "--------------\n", empty) blockToRST tabStop (Header level lst) = - let (headerText, refs) = inlineListToRST lst in - let headerLength = length $ render headerText in - let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) in - let border = text $ replicate headerLength headerChar in - (headerText <> char '\n' <> border <> char '\n', refs) + let (headerText, refs) = inlineListToRST lst in + let headerLength = length $ render headerText in + let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) in + let border = text $ replicate headerLength headerChar in + (headerText <> char '\n' <> border <> char '\n', refs) -- | Convert bullet list item (list of blocks) to reStructuredText. -- Returns a pair of 'Doc', the first the main text, the second references @@ -117,8 +136,8 @@ bulletListItemToRST :: Int -- ^ tab stop -> [Block] -- ^ list item (list of blocks) -> (Doc, Doc) bulletListItemToRST tabStop list = - let (main, refs) = unzip $ map (blockToRST tabStop) list in - (hang (text "- ") tabStop (vcat main), (vcat refs)) + let (main, refs) = unzip $ map (blockToRST tabStop) list in + (hang (text "- ") tabStop (vcat main), (vcat refs)) -- | Convert an ordered list item (list of blocks) to reStructuredText. -- Returns a pair of 'Doc', the first the main text, the second references @@ -127,9 +146,9 @@ orderedListItemToRST :: Int -- ^ tab stop -> [Block] -- ^ list item (list of blocks) -> (Doc, Doc) orderedListItemToRST tabStop num list = - let (main, refs) = unzip $ map (blockToRST tabStop) list - spacer = if (length (show num) < 2) then " " else "" in - (hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat main), (vcat refs)) + let (main, refs) = unzip $ map (blockToRST tabStop) list + spacer = if (length (show num) < 2) then " " else "" in + (hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat main), (vcat refs)) -- | Convert a list of inline elements to reStructuredText. -- Returns a pair of 'Doc', the first the main text, the second references. @@ -151,39 +170,41 @@ inlineToRST (HtmlInline str) = (empty, empty) inlineToRST (LineBreak) = inlineToRST Space -- RST doesn't have line breaks inlineToRST Space = (char ' ', empty) -- --- Note: can assume reference links have been replaced where possible with explicit links. +-- Note: can assume reference links have been replaced where possible +-- with explicit links. -- inlineToRST (Link txt (Src src tit)) = - let (linktext, ref') = if (null txt) || (txt == [Str ""]) then - (text "link", empty) - else - inlineListToRST $ normalizeSpaces txt in - let link = char '`' <> linktext <> text "`_" - linktext' = render linktext in - let linktext'' = if (':' `elem` linktext') then "`" ++ linktext' ++ "`" else linktext' in + let (linktext, ref') = if (null txt) || (txt == [Str ""]) + then (text "link", empty) + else inlineListToRST $ normalizeSpaces txt in + let link = char '`' <> linktext <> text "`_" + linktext' = render linktext in + let linktext'' = if (':' `elem` linktext') + then "`" ++ linktext' ++ "`" + else linktext' in let ref = text ".. _" <> text linktext'' <> text ": " <> text src in (link, ref' $$ ref) inlineToRST (Link txt (Ref [])) = - let (linktext, refs) = inlineListToRST txt in - (char '[' <> linktext <> char ']', refs) + let (linktext, refs) = inlineListToRST txt in + (char '[' <> linktext <> char ']', refs) inlineToRST (Link txt (Ref ref)) = - let (linktext, refs1) = inlineListToRST txt - (reftext, refs2) = inlineListToRST ref in - (char '[' <> linktext <> text "][" <> reftext <> char ']', refs1 $$ refs2) + let (linktext, refs1) = inlineListToRST txt + (reftext, refs2) = inlineListToRST ref in + (char '[' <> linktext <> text "][" <> reftext <> char ']', refs1 $$ refs2) inlineToRST (Image alternate (Src source tit)) = - let (alt, ref') = if (null alternate) || (alternate == [Str ""]) then - (text "image", empty) - else - inlineListToRST $ normalizeSpaces alternate in - let link = char '|' <> alt <> char '|' in - let ref = text ".. " <> link <> text " image:: " <> text source in - (link, ref' $$ ref) + let (alt, ref') = if (null alternate) || (alternate == [Str ""]) + then (text "image", empty) + else inlineListToRST $ normalizeSpaces alternate in + let link = char '|' <> alt <> char '|' in + let ref = text ".. " <> link <> text " image:: " <> text source in + (link, ref' $$ ref) inlineToRST (Image alternate (Ref [])) = - let (alttext, refs) = inlineListToRST alternate in - (char '|' <> alttext <> char '|', refs) + let (alttext, refs) = inlineListToRST alternate in + (char '|' <> alttext <> char '|', refs) -- The following case won't normally occur... inlineToRST (Image alternate (Ref ref)) = - let (alttext, refs1) = inlineListToRST alternate - (reftext, refs2) = inlineListToRST ref in - (char '|' <> alttext <> char '|', refs1 $$ refs2) -inlineToRST (NoteRef ref) = (text " [" <> text (escapeString ref) <> char ']' <> char '_', empty) + let (alttext, refs1) = inlineListToRST alternate + (reftext, refs2) = inlineListToRST ref in + (char '|' <> alttext <> char '|', refs1 $$ refs2) +inlineToRST (NoteRef ref) = + (text " [" <> text (escapeString ref) <> char ']' <> char '_', empty) diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 386a5b51b..3dbda8518 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,4 +1,14 @@ --- | Convert Pandoc to rich text format. +{- | + Module : + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of 'Pandoc' documents to RTF (rich text format). +-} module Text.Pandoc.Writers.RTF ( writeRTF ) where @@ -10,24 +20,24 @@ import Char ( ord, chr ) -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String writeRTF options (Pandoc meta blocks) = - let notes = filter isNoteBlock blocks in -- assumes all notes are at outer level - let head = if writerStandalone options then - rtfHeader notes (writerHeader options) meta - else - "" - foot = if writerStandalone options then "\n}\n" else "" - body = (writerIncludeBefore options) ++ - (concatMap (blockToRTF notes 0) (replaceReferenceLinks blocks)) ++ - (writerIncludeAfter options) in - head ++ body ++ foot + -- assumes all notes are at outer level + let notes = filter isNoteBlock blocks in + let head = if writerStandalone options + then rtfHeader notes (writerHeader options) meta + else "" + foot = if writerStandalone options then "\n}\n" else "" + body = (writerIncludeBefore options) ++ (concatMap (blockToRTF notes 0) + (replaceReferenceLinks blocks)) ++ + (writerIncludeAfter options) in + head ++ body ++ foot -- | Convert unicode characters (> 127) into rich text format representation. handleUnicode :: String -> String handleUnicode [] = [] -handleUnicode (c:cs) = if (ord c) > 127 then - '\\':'u':(show (ord c)) ++ "?" ++ (handleUnicode cs) - else - c:(handleUnicode cs) +handleUnicode (c:cs) = if (ord c) > 127 + then '\\':'u':(show (ord c)) ++ "?" ++ + (handleUnicode cs) + else c:(handleUnicode cs) escapeSpecial = backslashEscape "{\\}" escapeTab = gsub "\\\\t" "\\\\tab " @@ -56,8 +66,8 @@ rtfParSpaced :: Int -- ^ space after (in twips) -> String -- ^ string with content -> String rtfParSpaced spaceAfter indent firstLineIndent content = - "{\\pard \\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ - " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" + "{\\pard \\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ + " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" -- | Default paragraph. rtfPar :: Int -- ^ block indent (in twips) @@ -85,9 +95,10 @@ bulletMarker indent = case (indent `mod` 720) of -- | Returns appropriate (list of) ordered list markers for indent level. orderedMarkers :: Int -> [String] -orderedMarkers indent = case (indent `mod` 720) of - 0 -> map (\x -> show x ++ ".") [1..] - otherwise -> map (\x -> show x ++ ".") $ cycle ['a'..'z'] +orderedMarkers indent = + case (indent `mod` 720) of + 0 -> map (\x -> show x ++ ".") [1..] + otherwise -> map (\x -> show x ++ ".") $ cycle ['a'..'z'] -- | Returns RTF header. rtfHeader :: [Block] -- ^ list of note blocks @@ -95,16 +106,20 @@ rtfHeader :: [Block] -- ^ list of note blocks -> Meta -- ^ bibliographic information -> String rtfHeader notes headerText (Meta title authors date) = - let titletext = if null title then - "" - else - rtfPar 0 0 ("\\qc \\b \\fs36 " ++ inlineListToRTF notes title) - authorstext = if null authors then - "" - else - rtfPar 0 0 ("\\qc " ++ (joinWithSep "\\" (map stringToRTF authors))) - datetext = if date == "" then "" else rtfPar 0 0 ("\\qc " ++ stringToRTF date) in - let spacer = if null (titletext ++ authorstext ++ datetext) then "" else rtfPar 0 0 "" in + let titletext = if null title + then "" + else rtfPar 0 0 ("\\qc \\b \\fs36 " ++ + inlineListToRTF notes title) + authorstext = if null authors + then "" + else rtfPar 0 0 ("\\qc " ++ (joinWithSep "\\" + (map stringToRTF authors))) + datetext = if date == "" + then "" + else rtfPar 0 0 ("\\qc " ++ stringToRTF date) in + let spacer = if null (titletext ++ authorstext ++ datetext) + then "" + else rtfPar 0 0 "" in headerText ++ titletext ++ authorstext ++ datetext ++ spacer -- | Convert Pandoc block element to RTF. @@ -114,32 +129,36 @@ blockToRTF :: [Block] -- ^ list of note blocks -> String blockToRTF notes indent Blank = rtfPar indent 0 "" blockToRTF notes indent Null = "" -blockToRTF notes indent (Plain lst) = rtfCompact indent 0 (inlineListToRTF notes lst) -blockToRTF notes indent (Para lst) = rtfPar indent 0 (inlineListToRTF notes lst) +blockToRTF notes indent (Plain lst) = + rtfCompact indent 0 (inlineListToRTF notes lst) +blockToRTF notes indent (Para lst) = + rtfPar indent 0 (inlineListToRTF notes lst) blockToRTF notes indent (BlockQuote lst) = - concatMap (blockToRTF notes (indent + indentIncrement)) lst -blockToRTF notes indent (Note ref lst) = "" -- there shouldn't be any after filtering + concatMap (blockToRTF notes (indent + indentIncrement)) lst +blockToRTF notes indent (Note ref lst) = "" -- shouldn't be any aftr filtering blockToRTF notes indent (Key _ _) = "" -blockToRTF notes indent (CodeBlock str) = rtfPar indent 0 ("\\f1 " ++ (codeStringToRTF str)) +blockToRTF notes indent (CodeBlock str) = + rtfPar indent 0 ("\\f1 " ++ (codeStringToRTF str)) blockToRTF notes indent (RawHtml str) = "" blockToRTF notes indent (BulletList lst) = - spaceAtEnd $ concatMap (listItemToRTF notes indent (bulletMarker indent)) lst + spaceAtEnd $ + concatMap (listItemToRTF notes indent (bulletMarker indent)) lst blockToRTF notes indent (OrderedList lst) = - spaceAtEnd $ concat $ zipWith (listItemToRTF notes indent) (orderedMarkers indent) lst + spaceAtEnd $ concat $ + zipWith (listItemToRTF notes indent) (orderedMarkers indent) lst blockToRTF notes indent HorizontalRule = - rtfPar indent 0 "\\qc \\emdash\\emdash\\emdash\\emdash\\emdash" + rtfPar indent 0 "\\qc \\emdash\\emdash\\emdash\\emdash\\emdash" blockToRTF notes indent (Header level lst) = - rtfPar indent 0 ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ - (inlineListToRTF notes lst)) + rtfPar indent 0 ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ + (inlineListToRTF notes lst)) -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. spaceAtEnd :: String -> String spaceAtEnd str = - if isSuffixOf "\\par}\n" str then - (take ((length str) - 6) str) ++ "\\sa180\\par}\n" - else - str + if isSuffixOf "\\par}\n" str + then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" + else str -- | Convert list item (list of blocks) to RTF. listItemToRTF :: [Block] -- ^ list of note blocks @@ -148,13 +167,14 @@ listItemToRTF :: [Block] -- ^ list of note blocks -> [Block] -- ^ list item (list of blocks) -> [Char] listItemToRTF notes indent marker [] = - rtfCompact (indent + listIncrement) (0 - listIncrement) - (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") + rtfCompact (indent + listIncrement) (0 - listIncrement) + (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") listItemToRTF notes indent marker list = - let (first:rest) = map (blockToRTF notes (indent + listIncrement)) list in - let modFirst = gsub "\\\\fi-?[0-9]+" ("\\\\fi" ++ (show (0 - listIncrement)) ++ - " " ++ marker ++ "\\\\tx" ++ (show listIncrement) ++ "\\\\tab") first in - modFirst ++ (concat rest) + let (first:rest) = map (blockToRTF notes (indent + listIncrement)) list in + let modFirst = gsub "\\\\fi-?[0-9]+" ("\\\\fi" ++ + (show (0 - listIncrement)) ++ " " ++ marker ++ + "\\\\tx" ++ (show listIncrement) ++ "\\\\tab") first in + modFirst ++ (concat rest) -- | Convert list of inline items to RTF. inlineListToRTF :: [Block] -- ^ list of note blocks @@ -167,7 +187,8 @@ inlineToRTF :: [Block] -- ^ list of note blocks -> Inline -- ^ inline to convert -> String inlineToRTF notes (Emph lst) = "{\\i " ++ (inlineListToRTF notes lst) ++ "} " -inlineToRTF notes (Strong lst) = "{\\b " ++ (inlineListToRTF notes lst) ++ "} " +inlineToRTF notes (Strong lst) = + "{\\b " ++ (inlineListToRTF notes lst) ++ "} " inlineToRTF notes (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} " inlineToRTF notes (Str str) = stringToRTF str inlineToRTF notes (TeX str) = latexToRTF str @@ -175,20 +196,24 @@ inlineToRTF notes (HtmlInline str) = "" inlineToRTF notes (LineBreak) = "\\line " inlineToRTF notes Space = " " inlineToRTF notes (Link text (Src src tit)) = - "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ "\"}}{\\fldrslt{\\ul\n" - ++ (inlineListToRTF notes text) ++ "\n}}}\n" -inlineToRTF notes (Link text (Ref [])) = "[" ++ (inlineListToRTF notes text) ++ "]" -inlineToRTF notes (Link text (Ref ref)) = "[" ++ (inlineListToRTF notes text) ++ "][" ++ - (inlineListToRTF notes ref) ++ "]" -- this is what markdown does, for better or worse -inlineToRTF notes (Image alternate (Src source tit)) = "{\\cf1 [image: " ++ source ++ "]\\cf0}" -inlineToRTF notes (Image alternate (Ref [])) = "![" ++ (inlineListToRTF notes alternate) ++ "]" -inlineToRTF notes (Image alternate (Ref ref)) = "![" ++ (inlineListToRTF notes alternate) ++ - "][" ++ (inlineListToRTF notes ref) ++ "]" + "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ + "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF notes text) ++ "\n}}}\n" +inlineToRTF notes (Link text (Ref [])) = + "[" ++ (inlineListToRTF notes text) ++ "]" +inlineToRTF notes (Link text (Ref ref)) = + "[" ++ (inlineListToRTF notes text) ++ "][" ++ + (inlineListToRTF notes ref) ++ "]" -- this is what markdown does +inlineToRTF notes (Image alternate (Src source tit)) = + "{\\cf1 [image: " ++ source ++ "]\\cf0}" +inlineToRTF notes (Image alternate (Ref [])) = + "![" ++ (inlineListToRTF notes alternate) ++ "]" +inlineToRTF notes (Image alternate (Ref ref)) = "![" ++ + (inlineListToRTF notes alternate) ++ "][" ++ + (inlineListToRTF notes ref) ++ "]" inlineToRTF [] (NoteRef ref) = "" inlineToRTF ((Note firstref firstblocks):rest) (NoteRef ref) = - if firstref == ref then - "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ - (concatMap (blockToRTF rest 0) firstblocks) ++ "}" - else - inlineToRTF rest (NoteRef ref) + if firstref == ref + then "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ + (concatMap (blockToRTF rest 0) firstblocks) ++ "}" + else inlineToRTF rest (NoteRef ref) diff --git a/src/Text/ParserCombinators/Pandoc.hs b/src/Text/ParserCombinators/Pandoc.hs index a78b776d3..aa3277574 100644 --- a/src/Text/ParserCombinators/Pandoc.hs +++ b/src/Text/ParserCombinators/Pandoc.hs @@ -1,4 +1,14 @@ --- | Special parser combinators for Pandoc readers. +{- | + Module : Text.ParserCombinators.Pandoc + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Special parser combinators for Pandoc readers. +-} module Text.ParserCombinators.Pandoc ( many1Till, followedBy', @@ -79,8 +89,9 @@ many1Till p end = try (do rest <- manyTill p end return (first:rest)) --- | A more general form of @notFollowedBy@. This one allows any type of parser to --- be specified, and succeeds only if that parser fails. It does not consume any input. +-- | A more general form of @notFollowedBy@. This one allows any +-- type of parser to be specified, and succeeds only if that parser fails. +-- It does not consume any input. notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () notFollowedBy' parser = try (do { c <- try parser; unexpected (show c) } <|> return ()) @@ -90,10 +101,9 @@ notFollowedBy' parser = try (do { c <- try parser; unexpected (show c) } followedBy' :: (Show b) => GenParser a st b -> GenParser a st () followedBy' parser = do isNotFollowed <- option False (do{ notFollowedBy' parser; return True}) - if isNotFollowed then - fail "not followed by parser" - else - return () + if isNotFollowed + then fail "not followed by parser" + else return () -- | Parses one of a list of strings (tried in order). oneOfStrings :: [String] -> GenParser Char st String |