diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 85 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 118 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 601 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 631 |
4 files changed, 750 insertions, 685 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f9a738e94..c157f3b0e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,4 +1,14 @@ --- | Converts HTML to 'Pandoc' document. +{- | + Module : Text.Pandoc.Readers.HTML + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of HTML to 'Pandoc' document. +-} module Text.Pandoc.Readers.HTML ( readHtml, rawHtmlInline, @@ -30,10 +40,11 @@ testString = testStringWith parseHtml -- Constants -- -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "br", "cite", - "code", "dfn", "em", "font", "i", "img", "input", "kbd", "label", "q", - "s", "samp", "select", "small", "span", "strike", "strong", "sub", - "sup", "textarea", "tt", "u", "var"] +inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", + "br", "cite", "code", "dfn", "em", "font", "i", "img", + "input", "kbd", "label", "q", "s", "samp", "select", + "small", "span", "strike", "strong", "sub", "sup", + "textarea", "tt", "u", "var"] -- -- HTML utility functions @@ -50,9 +61,10 @@ inlinesTilEnd tag = try (do return inlines) -- extract type from a tag: e.g. br from <br>, < br >, </br>, etc. -extractTagType tag = case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of - Just [match] -> (map toLower match) - Nothing -> "" +extractTagType tag = + case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of + Just [match] -> (map toLower match) + Nothing -> "" anyHtmlTag = try (do char '<' @@ -90,7 +102,8 @@ htmlTag tag = try (do -- parses a quoted html attribute value quoted quoteChar = do - result <- between (char quoteChar) (char quoteChar) (many (noneOf [quoteChar])) + result <- between (char quoteChar) (char quoteChar) + (many (noneOf [quoteChar])) return (result, [quoteChar]) htmlAttributes = do @@ -116,9 +129,11 @@ htmlRegularAttribute = try (do spaces (content, quoteStr) <- choice [ (quoted '\''), (quoted '"'), - (do{ a <- (many (alphaNum <|> (oneOf "-._:"))); - return (a,"")} ) ] - return (name, content, (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) + (do + a <- many (alphaNum <|> (oneOf "-._:")) + return (a,"")) ] + return (name, content, + (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) htmlEndTag tag = try (do char '<' @@ -135,17 +150,11 @@ isInline tag = (extractTagType tag) `elem` inlineHtmlTags anyHtmlBlockTag = try (do tag <- choice [anyHtmlTag, anyHtmlEndTag] - if isInline tag then - fail "inline tag" - else - return tag) + if isInline tag then fail "inline tag" else return tag) anyHtmlInlineTag = try (do tag <- choice [ anyHtmlTag, anyHtmlEndTag ] - if isInline tag then - return tag - else - fail "not an inline tag") + if isInline tag then return tag else fail "not an inline tag") -- scripts must be treated differently, because they can contain <> etc. htmlScript = try (do @@ -155,13 +164,11 @@ htmlScript = try (do rawHtmlBlock = try (do notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"]) - body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition] + body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, + definition] sp <- (many space) state <- getState - if stateParseRaw state then - return (RawHtml (body ++ sp)) - else - return Null) + if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null) htmlComment = try (do string "<!--" @@ -266,10 +273,10 @@ headerLevel n = try (do hrule = try (do (tag, attribs) <- htmlTag "hr" state <- getState - if (not (null attribs)) && (stateParseRaw state) then - unexpected "attributes in hr" -- in this case we want to parse it as raw html - else - return HorizontalRule) + if (not (null attribs)) && (stateParseRaw state) + then -- in this case we want to parse it as raw html + unexpected "attributes in hr" + else return HorizontalRule) -- -- code blocks @@ -352,29 +359,31 @@ inline = choice [ text, special ] <?> "inline" text = choice [ entity, strong, emph, code, str, linebreak, whitespace ] <?> "text" -special = choice [ link, image, rawHtmlInline ] <?> "link, inline html, or image" +special = choice [ link, image, rawHtmlInline ] <?> + "link, inline html, or image" entity = try (do char '&' - body <- choice [(many1 letter), - (try (do{ char '#'; num <- many1 digit; return ("#" ++ num)}))] + body <- choice [(many1 letter), (try (do + char '#' + num <- many1 digit + return ("#" ++ num)))] char ';' return (Str [fromMaybe '?' (htmlEntityToChar ("&" ++ body ++ ";"))])) code = try (do htmlTag "code" result <- manyTill anyChar (htmlEndTag "code") - -- remove internal line breaks, leading and trailing space, and decode entities - let result' = decodeEntities $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result + -- remove internal line breaks, leading and trailing space, + -- and decode entities + let result' = decodeEntities $ removeLeadingTrailingSpace $ + joinWithSep " " $ lines result return (Code result')) rawHtmlInline = do result <- choice [htmlScript, anyHtmlInlineTag] state <- getState - if stateParseRaw state then - return (HtmlInline result) - else - return (Str "") + if stateParseRaw state then return (HtmlInline result) else return (Str "") betweenTags tag = try (do htmlTag tag diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index a62ff7b94..81004b1f1 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,14 @@ --- | Converts LaTeX to 'Pandoc' document. +{- | + Module : Text.Pandoc.Readers.LaTeX + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of LaTeX to 'Pandoc' document. +-} module Text.Pandoc.Readers.LaTeX ( readLaTeX, rawLaTeXInline, @@ -81,7 +91,8 @@ end name = try (do spaces return name) --- | Returns a list of block elements containing the contents of an environment. +-- | Returns a list of block elements containing the contents of an +-- environment. environment name = try (do begin name spaces @@ -104,15 +115,16 @@ anyEnvironment = try (do -- | Process LaTeX preamble, extracting metadata. processLaTeXPreamble = do - manyTill (choice [bibliographic, comment, unknownCommand, nullBlock]) (try (string "\\begin{document}")) + manyTill (choice [bibliographic, comment, unknownCommand, nullBlock]) + (try (string "\\begin{document}")) spaces -- | Parse LaTeX and return 'Pandoc'. parseLaTeX = do - option () processLaTeXPreamble -- preamble might not be present, if a fragment + option () processLaTeXPreamble -- preamble might not be present (fragment) blocks <- parseBlocks spaces - option "" (string "\\end{document}") -- if parsing a fragment, this might not be present + option "" (string "\\end{document}") -- might not be present (in fragment) spaces eof state <- getState @@ -122,7 +134,8 @@ parseLaTeX = do let title' = stateTitle state let authors' = stateAuthors state let date' = stateDate state - return (Pandoc (Meta title' authors' date') (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks))) + return (Pandoc (Meta title' authors' date') + (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks))) -- -- parsing blocks @@ -133,9 +146,10 @@ parseBlocks = do result <- many block return result -block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, comment, - bibliographic, para, specialEnvironment, itemBlock, unknownEnvironment, - unknownCommand ] <?> "block" +block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, + comment, bibliographic, para, specialEnvironment, + itemBlock, unknownEnvironment, unknownCommand ] <?> + "block" -- -- header blocks @@ -157,7 +171,8 @@ headerLevel n = try (do -- hrule = try (do - oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", "\\newpage" ] + oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", + "\\newpage" ] spaces return HorizontalRule) @@ -166,8 +181,10 @@ hrule = try (do -- codeBlock = try (do - string "\\begin{verbatim}" -- don't use begin function because it gobbles whitespace - option "" blanklines -- we want to gobble blank lines, but not leading space + string "\\begin{verbatim}" -- don't use begin function because it + -- gobbles whitespace + option "" blanklines -- we want to gobble blank lines, but not + -- leading space contents <- manyTill anyChar (try (string "\\end{verbatim}")) spaces return (CodeBlock (stripTrailingNewlines contents))) @@ -266,7 +283,8 @@ authors = try (do string "\\author{" authors <- manyTill anyChar (char '}') spaces - let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\\\\\" "\n" authors + let authors' = map removeLeadingTrailingSpace $ lines $ + gsub "\\\\\\\\" "\n" authors updateState (\state -> state { stateAuthors = authors' }) return Null) @@ -286,21 +304,19 @@ date = try (do itemBlock = try (do ("item", _, args) <- command state <- getState - if (stateParserContext state == ListItemState) then - fail "item should be handled by list block" - else - if null args then - return Null - else - return (Plain [Str (stripFirstAndLast (head args))])) + if (stateParserContext state == ListItemState) + then fail "item should be handled by list block" + else if null args + then return Null + else return (Plain [Str (stripFirstAndLast (head args))])) -- -- raw LaTeX -- specialEnvironment = do -- these are always parsed as raw - followedBy' (choice (map (\name -> begin name) ["tabular", "figure", "tabbing", "eqnarry", - "picture", "table", "verse", "theorem"])) + followedBy' (choice (map (\name -> begin name) ["tabular", "figure", + "tabbing", "eqnarry", "picture", "table", "verse", "theorem"])) rawLaTeXEnvironment -- | Parse any LaTeX environment and return a Para block containing @@ -316,18 +332,20 @@ rawLaTeXEnvironment = try (do args <- option [] commandArgs let argStr = concat args contents <- manyTill (choice [(many1 (noneOf "\\")), - (do{ (Para [TeX str]) <- rawLaTeXEnvironment; return str }), - string "\\"]) (end name') + (do + (Para [TeX str]) <- rawLaTeXEnvironment + return str), + string "\\" ]) + (end name') spaces return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++ - (concat contents) ++ "\\end{" ++ name' ++ "}")])) + (concat contents) ++ "\\end{" ++ name' ++ "}")])) unknownEnvironment = try (do state <- getState - result <- if stateParseRaw state then -- check to see whether we should include raw TeX - rawLaTeXEnvironment -- if so, get the whole raw environment - else - anyEnvironment -- otherwise just the contents + result <- if stateParseRaw state -- check whether we should include raw TeX + then rawLaTeXEnvironment -- if so, get whole raw environment + else anyEnvironment -- otherwise just the contents return result) unknownCommand = try (do @@ -338,14 +356,12 @@ unknownCommand = try (do spaces let argStr = concat args state <- getState - if (name == "item") && ((stateParserContext state) == ListItemState) then - fail "should not be parsed as raw" - else - string "" - if stateParseRaw state then - return (Plain [TeX ("\\" ++ name ++ star ++ argStr)]) - else - return (Plain [Str (joinWithSep " " args)])) + if (name == "item") && ((stateParserContext state) == ListItemState) + then fail "should not be parsed as raw" + else string "" + if stateParseRaw state + then return (Plain [TeX ("\\" ++ name ++ star ++ argStr)]) + else return (Plain [Str (joinWithSep " " args)])) -- latex comment comment = try (do @@ -358,9 +374,9 @@ comment = try (do -- inline -- -inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots, accentedChar, - specialChar, specialInline, escapedChar, unescapedChar, str, - endline, whitespace ] <?> "inline" +inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots, + accentedChar, specialChar, specialInline, escapedChar, + unescapedChar, str, endline, whitespace ] <?> "inline" specialInline = choice [ link, image, footnote, rawLaTeXInline ] <?> "link, raw TeX, note, or image" @@ -397,8 +413,8 @@ accentTable = ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]), ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] -specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, oslash, pound, - euro, copyright, sect ] +specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, + oslash, pound, euro, copyright, sect ] ccedil = try (do char '\\' @@ -563,15 +579,14 @@ image = try (do footnote = try (do (name, _, (contents:[])) <- command - if ((name == "footnote") || (name == "thanks")) then - string "" - else - fail "not a footnote or thanks command" + if ((name == "footnote") || (name == "thanks")) + then string "" + else fail "not a footnote or thanks command" let contents' = stripFirstAndLast contents state <- getState let blocks = case runParser parseBlocks state "footnote" contents of - Left err -> error $ "Input:\n" ++ show contents' ++ - "\nError:\n" ++ show err + Left err -> error $ "Input:\n" ++ show contents' ++ + "\nError:\n" ++ show err Right result -> result let notes = stateNoteBlocks state let nextRef = case notes of @@ -586,8 +601,7 @@ rawLaTeXInline = try (do (name, star, args) <- command let argStr = concat args state <- getState - if ((name == "begin") || (name == "end") || (name == "item")) then - fail "not an inline command" - else - string "" + if ((name == "begin") || (name == "end") || (name == "item")) + then fail "not an inline command" + else string "" return (TeX ("\\" ++ name ++ star ++ argStr))) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 034e5d8e4..9ca73dee5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,14 @@ --- | Convert markdown to Pandoc document. +{- | + Module : Text.Pandoc.Readers.Markdown + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion of markdown-formatted plain text to 'Pandoc' document. +-} module Text.Pandoc.Readers.Markdown ( readMarkdown ) where @@ -8,8 +18,8 @@ import Text.ParserCombinators.Pandoc import Text.Pandoc.Definition import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) import Text.Pandoc.Shared -import Text.Pandoc.Readers.HTML ( rawHtmlInline, rawHtmlBlock, anyHtmlBlockTag, - anyHtmlInlineTag ) +import Text.Pandoc.Readers.HTML ( rawHtmlInline, rawHtmlBlock, + anyHtmlBlockTag, anyHtmlInlineTag ) import Text.Pandoc.HtmlEntities ( decodeEntities ) import Text.Regex ( matchRegex, mkRegex ) import Text.ParserCombinators.Parsec @@ -57,9 +67,10 @@ blockQuoteChar = '>' hyphenChar = '-' -- treat these as potentially non-text when parsing inline: -specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, emphStartAlt, - emphEndAlt, codeStart, codeEnd, autoLinkEnd, autoLinkStart, mathStart, - mathEnd, imageStart, noteStart, hyphenChar] +specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, + emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd, + autoLinkStart, mathStart, mathEnd, imageStart, noteStart, + hyphenChar] -- -- auxiliary functions @@ -115,14 +126,16 @@ numberOfNote (Note ref _) = (read ref) numberOfNote _ = 0 parseMarkdown = do - updateState (\state -> state { stateParseRaw = True }) -- need to parse raw HTML + updateState (\state -> state { stateParseRaw = True }) + -- need to parse raw HTML, since markdown allows it (title, author, date) <- option ([],[],"") titleBlock blocks <- parseBlocks let blocks' = filter (/= Null) blocks state <- getState let keys = reverse $ stateKeyBlocks state let notes = reverse $ stateNoteBlocks state - let sortedNotes = sortBy (\x y -> compare (numberOfNote x) (numberOfNote y)) notes + let sortedNotes = sortBy (\x y -> compare (numberOfNote x) + (numberOfNote y)) notes return (Pandoc (Meta title author date) (blocks' ++ sortedNotes ++ keys)) -- @@ -133,8 +146,9 @@ parseBlocks = do result <- manyTill block eof return result -block = choice [ codeBlock, note, referenceKey, header, hrule, list, blockQuote, rawHtmlBlocks, - rawLaTeXEnvironment, para, plain, blankBlock, nullBlock ] <?> "block" +block = choice [ codeBlock, note, referenceKey, header, hrule, list, + blockQuote, rawHtmlBlocks, rawLaTeXEnvironment, para, + plain, blankBlock, nullBlock ] <?> "block" -- -- header blocks @@ -154,33 +168,33 @@ atxClosing = try (do newline option "" blanklines) -setextHeader = choice (map (\x -> setextH x) (enumFromTo 1 (length setextHChars))) +setextHeader = choice $ + map (\x -> setextH x) (enumFromTo 1 (length setextHChars)) setextH n = try (do - txt <- many1 (do {notFollowedBy newline; inline}) - endline - many1 (char (setextHChars !! (n-1))) - skipSpaces - newline - option "" blanklines - return (Header n (normalizeSpaces txt))) + txt <- many1 (do {notFollowedBy newline; inline}) + endline + many1 (char (setextHChars !! (n-1))) + skipSpaces + newline + option "" blanklines + return (Header n (normalizeSpaces txt))) -- -- hrule block -- -hruleWith chr = - try (do - skipSpaces - char chr - skipSpaces - char chr - skipSpaces - char chr - skipMany (oneOf (chr:spaceChars)) - newline - option "" blanklines - return HorizontalRule) +hruleWith chr = try (do + skipSpaces + char chr + skipSpaces + char chr + skipSpaces + char chr + skipMany (oneOf (chr:spaceChars)) + newline + option "" blanklines + return HorizontalRule) hrule = choice (map hruleWith hruleChars) <?> "hrule" @@ -189,9 +203,9 @@ hrule = choice (map hruleWith hruleChars) <?> "hrule" -- indentedLine = try (do - indentSpaces - result <- manyTill anyChar newline - return (result ++ "\n")) + indentSpaces + result <- manyTill anyChar newline + return (result ++ "\n")) -- two or more indented lines, possibly separated by blank lines indentedBlock = try (do @@ -201,62 +215,66 @@ indentedBlock = try (do return (res1 ++ blanks ++ res2)) codeBlock = do - result <- choice [indentedBlock, indentedLine] - option "" blanklines - return (CodeBlock (stripTrailingNewlines result)) + result <- choice [indentedBlock, indentedLine] + option "" blanklines + return (CodeBlock (stripTrailingNewlines result)) -- -- note block -- rawLine = try (do - notFollowedBy' blankline - notFollowedBy' noteMarker - contents <- many1 nonEndline - end <- option "" (do - newline - option "" indentSpaces - return "\n") - return (contents ++ end)) + notFollowedBy' blankline + notFollowedBy' noteMarker + contents <- many1 nonEndline + end <- option "" (do + newline + option "" indentSpaces + return "\n") + return (contents ++ end)) rawLines = do lines <- many1 rawLine return (concat lines) note = try (do - ref <- noteMarker - char ':' - skipSpaces - skipEndline - raw <- sepBy rawLines (try (do {blankline; indentSpaces})) - option "" blanklines - -- parse the extracted text, which may contain various block elements: - state <- getState - let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of - Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err - Right result -> result - let identifiers = stateNoteIdentifiers state - case (findIndex (== ref) identifiers) of - Just n -> updateState (\s -> s {stateNoteBlocks = - (Note (show (n+1)) parsed):(stateNoteBlocks s)}) - Nothing -> updateState id - return Null) + ref <- noteMarker + char ':' + skipSpaces + skipEndline + raw <- sepBy rawLines (try (do {blankline; indentSpaces})) + option "" blanklines + -- parse the extracted text, which may contain various block elements: + state <- getState + let parsed = case runParser parseBlocks + (state {stateParserContext = BlockQuoteState}) "block" + ((joinWithSep "\n" raw) ++ "\n\n") of + Left err -> error $ "Raw block:\n" ++ show raw ++ + "\nError:\n" ++ show err + Right result -> result + let identifiers = stateNoteIdentifiers state + case (findIndex (== ref) identifiers) of + Just n -> updateState (\s -> s {stateNoteBlocks = + (Note (show (n+1)) parsed):(stateNoteBlocks s)}) + Nothing -> updateState id + return Null) -- -- block quotes -- emacsBoxQuote = try (do - string ",----" - manyTill anyChar newline - raw <- manyTill (try (do{ char '|'; - option ' ' (char ' '); - result <- manyTill anyChar newline; - return result})) - (string "`----") - manyTill anyChar newline - option "" blanklines - return raw) + string ",----" + manyTill anyChar newline + raw <- manyTill (try (do + char '|' + option ' ' (char ' ') + result <- manyTill anyChar newline + return result)) + (string "`----") + manyTill anyChar newline + option "" blanklines + return raw) emailBlockQuoteStart = try (do skipNonindentSpaces @@ -265,24 +283,28 @@ emailBlockQuoteStart = try (do return "> ") emailBlockQuote = try (do - emailBlockQuoteStart - raw <- sepBy (many (choice [nonEndline, - (try (do{ endline; - notFollowedBy' emailBlockQuoteStart; - return '\n'}))])) - (try (do {newline; emailBlockQuoteStart})) - newline <|> (do{ eof; return '\n'}) - option "" blanklines - return raw) + emailBlockQuoteStart + raw <- sepBy (many (choice [nonEndline, + (try (do + endline + notFollowedBy' emailBlockQuoteStart + return '\n'))])) + (try (do {newline; emailBlockQuoteStart})) + newline <|> (do{ eof; return '\n' }) + option "" blanklines + return raw) blockQuote = do - raw <- choice [ emailBlockQuote, emacsBoxQuote ] - -- parse the extracted block, which may contain various block elements: - state <- getState - let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of - Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err - Right result -> result - return (BlockQuote parsed) + raw <- choice [ emailBlockQuote, emacsBoxQuote ] + -- parse the extracted block, which may contain various block elements: + state <- getState + let parsed = case runParser parseBlocks + (state {stateParserContext = BlockQuoteState}) "block" + ((joinWithSep "\n" raw) ++ "\n\n") of + Left err -> error $ "Raw block:\n" ++ show raw ++ + "\nError:\n" ++ show err + Right result -> result + return (BlockQuote parsed) -- -- list blocks @@ -290,85 +312,81 @@ blockQuote = do list = choice [ bulletList, orderedList ] <?> "list" -bulletListStart = - try (do - option ' ' newline -- if preceded by a Plain block in a list context - skipNonindentSpaces - notFollowedBy' hrule -- because hrules start out just like lists - oneOf bulletListMarkers - spaceChar - skipSpaces) - -orderedListStart = - try (do - option ' ' newline -- if preceded by a Plain block in a list context - skipNonindentSpaces - many1 digit <|> count 1 letter - oneOf orderedListDelimiters - oneOf spaceChars - skipSpaces) +bulletListStart = try (do + option ' ' newline -- if preceded by a Plain block in a list context + skipNonindentSpaces + notFollowedBy' hrule -- because hrules start out just like lists + oneOf bulletListMarkers + spaceChar + skipSpaces) + +orderedListStart = try (do + option ' ' newline -- if preceded by a Plain block in a list context + skipNonindentSpaces + many1 digit <|> count 1 letter + oneOf orderedListDelimiters + oneOf spaceChars + skipSpaces) -- parse a line of a list item (start = parser for beginning of list item) listLine start = try (do notFollowedBy' start notFollowedBy blankline - notFollowedBy' (do{ indentSpaces; - many (spaceChar); - choice [bulletListStart, orderedListStart]}) + notFollowedBy' (do + indentSpaces + many (spaceChar) + choice [bulletListStart, orderedListStart]) line <- manyTill anyChar newline return (line ++ "\n")) -- parse raw text for one list item, excluding start marker and continuations -rawListItem start = - try (do - start - result <- many1 (listLine start) - blanks <- many blankline - return ((concat result) ++ blanks)) +rawListItem start = try (do + start + result <- many1 (listLine start) + blanks <- many blankline + return ((concat result) ++ blanks)) -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation start = - try (do - followedBy' indentSpaces - result <- many1 (listContinuationLine start) - blanks <- many blankline - return ((concat result) ++ blanks)) +listContinuation start = try (do + followedBy' indentSpaces + result <- many1 (listContinuationLine start) + blanks <- many blankline + return ((concat result) ++ blanks)) listContinuationLine start = try (do - notFollowedBy' blankline - notFollowedBy' start - option "" indentSpaces - result <- manyTill anyChar newline - return (result ++ "\n")) - -listItem start = - try (do - first <- rawListItem start - rest <- many (listContinuation start) - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState}) - "block" raw of - Left err -> error $ "Raw block:\n" ++ raw ++ "\nError:\n" ++ show err - Right result -> result - where raw = concat (first:rest) - return parsed) - -orderedList = - try (do - items <- many1 (listItem orderedListStart) - let items' = compactify items - return (OrderedList items')) - -bulletList = - try (do - items <- many1 (listItem bulletListStart) - let items' = compactify items - return (BulletList items')) + notFollowedBy' blankline + notFollowedBy' start + option "" indentSpaces + result <- manyTill anyChar newline + return (result ++ "\n")) + +listItem start = try (do + first <- rawListItem start + rest <- many (listContinuation start) + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let parsed = case runParser parseBlocks + (state {stateParserContext = ListItemState}) + "block" raw of + Left err -> error $ "Raw block:\n" ++ raw ++ + "\nError:\n" ++ show err + Right result -> result + where raw = concat (first:rest) + return parsed) + +orderedList = try (do + items <- many1 (listItem orderedListStart) + let items' = compactify items + return (OrderedList items')) + +bulletList = try (do + items <- many1 (listItem bulletListStart) + let items' = compactify items + return (BulletList items')) -- -- paragraph block @@ -377,7 +395,10 @@ bulletList = para = try (do result <- many1 inline newline - choice [ (do{ followedBy' (oneOfStrings [">", ",----"]); return "" }), blanklines ] + choice [ (do + followedBy' (oneOfStrings [">", ",----"]) + return "" ), + blanklines ] let result' = normalizeSpaces result return (Para result')) @@ -391,30 +412,28 @@ plain = do -- rawHtmlBlocks = try (do - htmlBlocks <- many1 rawHtmlBlock - let combined = concatMap (\(RawHtml str) -> str) htmlBlocks - let combined' = if (last combined == '\n') then - init combined -- strip extra newline - else - combined - return (RawHtml combined')) + htmlBlocks <- many1 rawHtmlBlock + let combined = concatMap (\(RawHtml str) -> str) htmlBlocks + let combined' = if (last combined == '\n') + then init combined -- strip extra newline + else combined + return (RawHtml combined')) -- -- reference key -- -referenceKey = - try (do - skipSpaces - label <- reference - char labelSep - skipSpaces - option ' ' (char autoLinkStart) - src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars)) - option ' ' (char autoLinkEnd) - tit <- option "" title - blanklines - return (Key label (Src (removeTrailingSpace src) tit))) +referenceKey = try (do + skipSpaces + label <- reference + char labelSep + skipSpaces + option ' ' (char autoLinkStart) + src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars)) + option ' ' (char autoLinkEnd) + tit <- option "" title + blanklines + return (Key label (Src (removeTrailingSpace src) tit))) -- -- inline @@ -423,10 +442,11 @@ referenceKey = text = choice [ math, strong, emph, code2, code1, str, linebreak, tabchar, whitespace, endline ] <?> "text" -inline = choice [ rawLaTeXInline, escapedChar, special, hyphens, text, ltSign, symbol ] <?> "inline" +inline = choice [ rawLaTeXInline, escapedChar, special, hyphens, text, + ltSign, symbol ] <?> "inline" -special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline, autoLink, - image ] <?> "link, inline html, note, or image" +special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline, + autoLink, image ] <?> "link, inline html, note, or image" escapedChar = escaped anyChar @@ -443,30 +463,33 @@ symbol = do hyphens = try (do result <- many1 (char '-') - if (length result) == 1 then - skipEndline -- don't want to treat endline after hyphen as a space - else - do{ string ""; return Space } + if (length result) == 1 + then skipEndline -- don't want to treat endline after hyphen as a space + else do{ string ""; return Space } return (Str result)) -- parses inline code, between codeStart and codeEnd -code1 = - try (do - char codeStart - result <- many (noneOf [codeEnd]) - char codeEnd - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines - return (Code result')) +code1 = try (do + char codeStart + result <- many (noneOf [codeEnd]) + char codeEnd + -- get rid of any internal newlines + let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result + return (Code result')) -- parses inline code, between 2 codeStarts and 2 codeEnds -code2 = - try (do - string [codeStart, codeStart] - result <- manyTill anyChar (try (string [codeEnd, codeEnd])) - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines - return (Code result')) - -mathWord = many1 (choice [(noneOf (" \t\n\\" ++ [mathEnd])), (try (do {c <- char '\\'; notFollowedBy (char mathEnd); return c}))]) +code2 = try (do + string [codeStart, codeStart] + result <- manyTill anyChar (try (string [codeEnd, codeEnd])) + let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result + -- get rid of any internal newlines + return (Code result')) + +mathWord = many1 (choice [ (noneOf (" \t\n\\" ++ [mathEnd])), + (try (do + c <- char '\\' + notFollowedBy (char mathEnd) + return c))]) math = try (do char mathStart @@ -477,12 +500,14 @@ math = try (do emph = do result <- choice [ (enclosed (char emphStart) (char emphEnd) inline), - (enclosed (char emphStartAlt) (char emphEndAlt) inline) ] + (enclosed (char emphStartAlt) (char emphEndAlt) inline) ] return (Emph (normalizeSpaces result)) strong = do - result <- choice [ (enclosed (count 2 (char emphStart)) (count 2 (char emphEnd)) inline), - (enclosed (count 2 (char emphStartAlt)) (count 2 (char emphEndAlt)) inline)] + result <- choice [ (enclosed (count 2 (char emphStart)) + (count 2 (char emphEnd)) inline), + (enclosed (count 2 (char emphStartAlt)) + (count 2 (char emphEndAlt)) inline) ] return (Strong (normalizeSpaces result)) whitespace = do @@ -507,23 +532,21 @@ str = do return (Str (decodeEntities result)) -- an endline character that can be treated as a space, not a structural break -endline = - try (do - newline - -- next line would allow block quotes without preceding blank line - -- Markdown.pl does allow this, but there's a chance of a wrapped - -- greater-than sign triggering a block quote by accident... --- notFollowedBy' (choice [emailBlockQuoteStart, string ",----"]) - notFollowedBy blankline - -- parse potential list starts at beginning of line differently if in a list: - st <- getState - if (stateParserContext st) == ListItemState then - do - notFollowedBy' orderedListStart - notFollowedBy' bulletListStart - else - option () pzero - return Space) +endline = try (do + newline + -- next line would allow block quotes without preceding blank line + -- Markdown.pl does allow this, but there's a chance of a wrapped + -- greater-than sign triggering a block quote by accident... + -- notFollowedBy' (choice [emailBlockQuoteStart, string ",----"]) + notFollowedBy blankline + -- parse potential list-starts differently if in a list: + st <- getState + if (stateParserContext st) == ListItemState + then do + notFollowedBy' orderedListStart + notFollowedBy' bulletListStart + else option () pzero + return Space) -- -- links @@ -537,92 +560,92 @@ reference = do return (normalizeSpaces label) -- source for a link, with optional title -source = - try (do - char srcStart - option ' ' (char autoLinkStart) - src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners)) - option ' ' (char autoLinkEnd) - tit <- option "" title - skipSpaces - char srcEnd - return (Src (removeTrailingSpace src) tit)) - -titleWith startChar endChar = - try (do - skipSpaces - skipEndline -- a title can be on the next line from the source - skipSpaces - char startChar - tit <- manyTill (choice [ try (do {char '\\'; char endChar}), - (noneOf (endChar:endLineChars)) ]) (char endChar) - let tit' = gsub "\"" """ tit - return tit') - -title = choice [titleWith '(' ')', titleWith '"' '"', titleWith '\'' '\''] <?> "title" +source = try (do + char srcStart + option ' ' (char autoLinkStart) + src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners)) + option ' ' (char autoLinkEnd) + tit <- option "" title + skipSpaces + char srcEnd + return (Src (removeTrailingSpace src) tit)) + +titleWith startChar endChar = try (do + skipSpaces + skipEndline -- a title can be on the next line from the source + skipSpaces + char startChar + tit <- manyTill (choice [ try (do {char '\\'; char endChar}), + (noneOf (endChar:endLineChars)) ]) (char endChar) + let tit' = gsub "\"" """ tit + return tit') + +title = choice [ titleWith '(' ')', + titleWith '"' '"', + titleWith '\'' '\''] <?> "title" link = choice [explicitLink, referenceLink] <?> "link" -explicitLink = - try (do - label <- reference - src <- source - return (Link label src)) +explicitLink = try (do + label <- reference + src <- source + return (Link label src)) referenceLink = choice [referenceLinkDouble, referenceLinkSingle] -referenceLinkDouble = -- a link like [this][/url/] - try (do - label <- reference - skipSpaces - skipEndline - skipSpaces - ref <- reference - return (Link label (Ref ref))) - -referenceLinkSingle = -- a link like [this] - try (do - label <- reference - return (Link label (Ref []))) - -autoLink = -- a link <like.this.com> - try (do - notFollowedBy' anyHtmlBlockTag - src <- between (char autoLinkStart) (char autoLinkEnd) - (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd]))) - case (matchRegex emailAddress src) of - Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) "")) - Nothing -> return (Link [Str src] (Src src ""))) - -emailAddress = mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace - -image = - try (do - char imageStart - (Link label src) <- link - return (Image label src)) +-- a link like [this][/url/] +referenceLinkDouble = try (do + label <- reference + skipSpaces + skipEndline + skipSpaces + ref <- reference + return (Link label (Ref ref))) + +-- a link like [this] +referenceLinkSingle = try (do + label <- reference + return (Link label (Ref []))) + +-- a link <like.this.com> +autoLink = try (do + notFollowedBy' anyHtmlBlockTag + src <- between (char autoLinkStart) (char autoLinkEnd) + (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd]))) + case (matchRegex emailAddress src) of + Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) "")) + Nothing -> return (Link [Str src] (Src src ""))) + +emailAddress = + mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace + +image = try (do + char imageStart + (Link label src) <- link + return (Image label src)) noteMarker = try (do - char labelStart - char noteStart - manyTill (noneOf " \t\n") (char labelEnd)) + char labelStart + char noteStart + manyTill (noneOf " \t\n") (char labelEnd)) noteRef = try (do - ref <- noteMarker - state <- getState - let identifiers = (stateNoteIdentifiers state) ++ [ref] - updateState (\st -> st {stateNoteIdentifiers = identifiers}) - return (NoteRef (show (length identifiers)))) + ref <- noteMarker + state <- getState + let identifiers = (stateNoteIdentifiers state) ++ [ref] + updateState (\st -> st {stateNoteIdentifiers = identifiers}) + return (NoteRef (show (length identifiers)))) inlineNote = try (do - char noteStart - char labelStart - contents <- manyTill inline (char labelEnd) - state <- getState - let identifiers = stateNoteIdentifiers state - let ref = show $ (length identifiers) + 1 - let noteBlocks = stateNoteBlocks state - updateState (\st -> st {stateNoteIdentifiers = (identifiers ++ [ref]), - stateNoteBlocks = (Note ref [Para contents]):noteBlocks}) - return (NoteRef ref)) + char noteStart + char labelStart + contents <- manyTill inline (char labelEnd) + state <- getState + let identifiers = stateNoteIdentifiers state + let ref = show $ (length identifiers) + 1 + let noteBlocks = stateNoteBlocks state + updateState (\st -> st {stateNoteIdentifiers = (identifiers ++ [ref]), + stateNoteBlocks = + (Note ref [Para contents]):noteBlocks}) + return (NoteRef ref)) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 69c7d9baa..1672e06dc 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,4 +1,14 @@ --- | Parse reStructuredText and return Pandoc document. +{- | + Module : Text.Pandoc.Readers.RST + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm at berkeley dot edu> + Stability : unstable + Portability : portable + +Conversion from reStructuredText to 'Pandoc' document. +-} module Text.Pandoc.Readers.RST ( readRST ) where @@ -61,16 +71,14 @@ promoteHeaders num [] = [] -- promote all the other headers. titleTransform :: [Block] -- ^ list of blocks -> ([Block], [Inline]) -- ^ modified list of blocks, title -titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title and subtitle - if (any isHeader1 rest) || (any isHeader2 rest) then - ((Header 1 head1):(Header 2 head2):rest, []) - else - ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) +titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle + if (any isHeader1 rest) || (any isHeader2 rest) + then ((Header 1 head1):(Header 2 head2):rest, []) + else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) titleTransform ((Header 1 head1):rest) = -- title, no subtitle - if (any isHeader1 rest) then - ((Header 1 head1):rest, []) - else - ((promoteHeaders 1 rest), head1) + if (any isHeader1 rest) + then ((Header 1 head1):rest, []) + else ((promoteHeaders 1 rest), head1) titleTransform blocks = (blocks, []) parseRST = do @@ -78,17 +86,18 @@ parseRST = do input <- getInput blocks <- parseBlocks -- first pass let anonymousKeys = filter isAnonKeyBlock blocks - let blocks' = if (null anonymousKeys) then - blocks - else -- run parser again to fill in anonymous links... - case runParser parseBlocks (state { stateKeyBlocks = anonymousKeys }) + let blocks' = if (null anonymousKeys) + then blocks + else -- run parser again to fill in anonymous links... + case runParser parseBlocks + (state { stateKeyBlocks = anonymousKeys }) "RST source, second pass" input of - Left err -> error $ "\nError:\n" ++ show err - Right result -> (filter isNotAnonKeyBlock result) - let (blocks'', title) = if stateStandalone state then - titleTransform blocks' - else - (blocks', []) + Left err -> error $ "\nError:\n" ++ show err + Right result -> + (filter isNotAnonKeyBlock result) + let (blocks'', title) = if stateStandalone state + then titleTransform blocks' + else (blocks', []) state <- getState let authors = stateAuthors state let date = stateDate state @@ -103,9 +112,10 @@ parseBlocks = do result <- manyTill block eof return result -block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, referenceKey, - imageBlock, unknownDirective, header, hrule, list, fieldList, lineBlock, - para, plain, blankBlock, nullBlock ] <?> "block" +block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, + referenceKey, imageBlock, unknownDirective, header, + hrule, list, fieldList, lineBlock, para, plain, + blankBlock, nullBlock ] <?> "block" -- -- field list @@ -117,28 +127,32 @@ fieldListItem = try (do string ": " skipSpaces first <- manyTill anyChar newline - rest <- many (do{ notFollowedBy (char ':'); - notFollowedBy blankline; - skipSpaces; - manyTill anyChar newline }) + rest <- many (do + notFollowedBy (char ':') + notFollowedBy blankline + skipSpaces + manyTill anyChar newline ) return (name, (joinWithSep " " (first:rest)))) fieldList = try (do items <- many1 fieldListItem blanklines let authors = case (lookup "Authors" items) of - Just auth -> [auth] - Nothing -> map snd (filter (\(x,y) -> x == "Author") items) + Just auth -> [auth] + Nothing -> map snd (filter (\(x,y) -> x == "Author") items) let date = case (lookup "Date" items) of - Just dat -> dat - Nothing -> "" + Just dat -> dat + Nothing -> "" let title = case (lookup "Title" items) of - Just tit -> [Str tit] - Nothing -> [] - let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && (x /= "Date") && - (x /= "Title")) items - let result = map (\(x,y) -> Para [Strong [Str x], Str ":", Space, Str y]) remaining - updateState (\st -> st { stateAuthors = authors, stateDate = date, stateTitle = title }) + Just tit -> [Str tit] + Nothing -> [] + let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && + (x /= "Date") && (x /= "Title")) items + let result = map (\(x,y) -> + Para [Strong [Str x], Str ":", Space, Str y]) remaining + updateState (\st -> st { stateAuthors = authors, + stateDate = date, + stateTitle = title }) return (BlockQuote result)) -- @@ -164,18 +178,17 @@ lineBlock = try (do para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph" codeBlockStart = try (do - string "::" - blankline - blankline) + string "::" + blankline + blankline) -- paragraph that ends in a :: starting a code block paraBeforeCodeBlock = try (do result <- many1 (do {notFollowedBy' codeBlockStart; inline}) followedBy' (string "::") - return (Para (if (last result == Space) then - normalizeSpaces result - else - (normalizeSpaces result) ++ [Str ":"]))) + return (Para (if (last result == Space) + then normalizeSpaces result + else (normalizeSpaces result) ++ [Str ":"]))) -- regular paragraph paraNormal = try (do @@ -195,9 +208,9 @@ plain = do -- imageBlock = try (do - string ".. image:: " - src <- manyTill anyChar newline - return (Plain [Image [Str "image"] (Src src "")])) + string ".. image:: " + src <- manyTill anyChar newline + return (Plain [Image [Str "image"] (Src src "")])) -- -- header blocks @@ -207,59 +220,58 @@ header = choice [ doubleHeader, singleHeader ] <?> "header" -- a header with lines on top and bottom doubleHeader = try (do - c <- oneOf underlineChars - rest <- many (char c) -- the top line - let lenTop = length (c:rest) - skipSpaces - newline - txt <- many1 (do {notFollowedBy blankline; inline}) - pos <- getPosition - let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else (do {return ()}) - blankline -- spaces and newline - count lenTop (char c) -- the bottom line - blanklines - -- check to see if we've had this kind of header before. - -- if so, get appropriate level. if not, add to list. - state <- getState - let headerTable = stateHeaderTable state - let (headerTable', level) = case findIndex (== DoubleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - return (Header level (normalizeSpaces txt))) + c <- oneOf underlineChars + rest <- many (char c) -- the top line + let lenTop = length (c:rest) + skipSpaces + newline + txt <- many1 (do {notFollowedBy blankline; inline}) + pos <- getPosition + let len = (sourceColumn pos) - 1 + if (len > lenTop) then fail "title longer than border" else (do {return ()}) + blankline -- spaces and newline + count lenTop (char c) -- the bottom line + blanklines + -- check to see if we've had this kind of header before. + -- if so, get appropriate level. if not, add to list. + state <- getState + let headerTable = stateHeaderTable state + let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) + setState (state { stateHeaderTable = headerTable' }) + return (Header level (normalizeSpaces txt))) -- a header with line on the bottom only singleHeader = try (do - notFollowedBy' whitespace - txt <- many1 (do {notFollowedBy blankline; inline}) - pos <- getPosition - let len = (sourceColumn pos) - 1 - blankline - c <- oneOf underlineChars - rest <- count (len - 1) (char c) - many (char c) - blanklines - state <- getState - let headerTable = stateHeaderTable state - let (headerTable', level) = case findIndex (== SingleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - return (Header level (normalizeSpaces txt))) + notFollowedBy' whitespace + txt <- many1 (do {notFollowedBy blankline; inline}) + pos <- getPosition + let len = (sourceColumn pos) - 1 + blankline + c <- oneOf underlineChars + rest <- count (len - 1) (char c) + many (char c) + blanklines + state <- getState + let headerTable = stateHeaderTable state + let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) + setState (state { stateHeaderTable = headerTable' }) + return (Header level (normalizeSpaces txt))) -- -- hrule block -- -hruleWith chr = - try (do - count 4 (char chr) - skipMany (char chr) - skipSpaces - newline - blanklines - return HorizontalRule) +hruleWith chr = try (do + count 4 (char chr) + skipMany (char chr) + skipSpaces + newline + blanklines + return HorizontalRule) hrule = choice (map hruleWith underlineChars) <?> "hrule" @@ -269,9 +281,9 @@ hrule = choice (map hruleWith underlineChars) <?> "hrule" -- read a line indented by a given string indentedLine indents = try (do - string indents - result <- manyTill anyChar newline - return (result ++ "\n")) + string indents + result <- manyTill anyChar newline + return (result ++ "\n")) -- two or more indented lines, possibly separated by blank lines -- if variable = True, then any indent will work, but it must be consistent through the block @@ -279,54 +291,59 @@ indentedLine indents = try (do indentedBlock variable = try (do state <- getState let tabStop = stateTabStop state - indents <- if variable then - many1 (oneOf " \t") - else - oneOfStrings ["\t", (replicate tabStop ' ')] + indents <- if variable + then many1 (oneOf " \t") + else oneOfStrings ["\t", (replicate tabStop ' ')] firstline <- manyTill anyChar newline rest <- many (choice [ indentedLine indents, - try (do {b <- blanklines; l <- indentedLine indents; return (b ++ l)})]) + try (do + b <- blanklines + l <- indentedLine indents + return (b ++ l))]) option "" blanklines return (firstline ++ "\n" ++ (concat rest))) codeBlock = try (do - codeBlockStart - result <- indentedBlock False -- the False means we want one tab stop indent on each line - return (CodeBlock (stripTrailingNewlines result))) + codeBlockStart + result <- indentedBlock False + -- the False means we want one tab stop indent on each line + return (CodeBlock (stripTrailingNewlines result))) -- -- raw html -- rawHtmlBlock = try (do - string ".. raw:: html" - blanklines - result <- indentedBlock True - return (RawHtml result)) + string ".. raw:: html" + blanklines + result <- indentedBlock True + return (RawHtml result)) -- -- raw latex -- rawLaTeXBlock = try (do - string ".. raw:: latex" - blanklines - result <- indentedBlock True - return (Para [(TeX result)])) + string ".. raw:: latex" + blanklines + result <- indentedBlock True + return (Para [(TeX result)])) -- -- block quotes -- blockQuote = try (do - block <- indentedBlock True - -- parse the extracted block, which may contain various block elements: - state <- getState - let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) - "block" (block ++ "\n\n") of - Left err -> error $ "Raw block:\n" ++ show block ++ "\nError:\n" ++ show err - Right result -> result - return (BlockQuote parsed)) + block <- indentedBlock True + -- parse the extracted block, which may contain various block elements: + state <- getState + let parsed = case runParser parseBlocks + (state {stateParserContext = BlockQuoteState}) + "block" (block ++ "\n\n") of + Left err -> error $ "Raw block:\n" ++ show block ++ + "\nError:\n" ++ show err + Right result -> result + return (BlockQuote parsed)) -- -- list blocks @@ -335,34 +352,36 @@ blockQuote = try (do list = choice [ bulletList, orderedList ] <?> "list" -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart = - try (do - notFollowedBy' hrule -- because hrules start out just like lists - marker <- oneOf bulletListMarkers - white <- many1 spaceChar - let len = length (marker:white) - return len) +bulletListStart = try (do + notFollowedBy' hrule -- because hrules start out just like lists + marker <- oneOf bulletListMarkers + white <- many1 spaceChar + let len = length (marker:white) + return len) withPeriodSuffix parser = try (do - a <- parser - b <- char '.' - return (a ++ [b])) + a <- parser + b <- char '.' + return (a ++ [b])) withParentheses parser = try (do - a <- char '(' - b <- parser - c <- char ')' - return ([a] ++ b ++ [c])) + a <- char '(' + b <- parser + c <- char ')' + return ([a] ++ b ++ [c])) withRightParen parser = try (do - a <- parser - b <- char ')' - return (a ++ [b])) + a <- parser + b <- char ')' + return (a ++ [b])) upcaseWord = map toUpper romanNumeral = do - let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii", "xxiv" ] + let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi", + "vii", "viii", "ix", "x", "xi", "xii", "xiii", + "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx", + "xxi", "xxii", "xxiii", "xxiv" ] let upperNumerals = map upcaseWord lowerNumerals result <- choice $ map string (lowerNumerals ++ upperNumerals) return result @@ -372,15 +391,14 @@ orderedListEnumerator = choice [ many1 digit, count 1 letter, romanNumeral ] --- parses ordered list start and returns its length (inc. following whitespace) -orderedListStart = - try (do - marker <- choice [ withPeriodSuffix orderedListEnumerator, - withParentheses orderedListEnumerator, - withRightParen orderedListEnumerator ] - white <- many1 spaceChar - let len = length (marker ++ white) - return len) +-- parses ordered list start and returns its length (inc following whitespace) +orderedListStart = try (do + marker <- choice [ withPeriodSuffix orderedListEnumerator, + withParentheses orderedListEnumerator, + withRightParen orderedListEnumerator ] + white <- many1 spaceChar + let len = length (marker ++ white) + return len) -- parse a line of a list item listLine markerLength = try (do @@ -393,72 +411,73 @@ listLine markerLength = try (do indentWith num = do state <- getState let tabStop = stateTabStop state - if (num < tabStop) then - count num (char ' ') - else - choice [ try (count num (char ' ')), - (try (do {char '\t'; count (num - tabStop) (char ' ')})) ] + if (num < tabStop) + then count num (char ' ') + else choice [ try (count num (char ' ')), + (try (do {char '\t'; count (num - tabStop) (char ' ')})) ] -- parse raw text for one list item, excluding start marker and continuations -rawListItem start = - try (do - markerLength <- start - firstLine <- manyTill anyChar newline - restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))) - --- continuation of a list item - indented and separated by blankline or (in compact lists) --- endline. Note: nested lists are parsed as continuations. -listContinuation markerLength = - try (do - blanks <- many1 blankline - result <- many1 (listLine markerLength) - return (blanks ++ (concat result))) - -listItem start = - try (do - (markerLength, first) <- rawListItem start - rest <- many (listContinuation markerLength) - blanks <- choice [ try (do {b <- many blankline; followedBy' start; return b}), - many1 blankline ] -- whole list must end with blank - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState}) - "list item" raw of - Left err -> error $ "Raw:\n" ++ raw ++ "\nError:\n" ++ show err - Right result -> result - where raw = concat (first:rest) ++ blanks - return parsed) - -orderedList = - try (do - items <- many1 (listItem orderedListStart) - let items' = compactify items - return (OrderedList items')) - -bulletList = - try (do - items <- many1 (listItem bulletListStart) - let items' = compactify items - return (BulletList items')) +rawListItem start = try (do + markerLength <- start + firstLine <- manyTill anyChar newline + restLines <- many (listLine markerLength) + return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))) + +-- continuation of a list item - indented and separated by blankline or +-- (in compact lists) endline. +-- Note: nested lists are parsed as continuations. +listContinuation markerLength = try (do + blanks <- many1 blankline + result <- many1 (listLine markerLength) + return (blanks ++ (concat result))) + +listItem start = try (do + (markerLength, first) <- rawListItem start + rest <- many (listContinuation markerLength) + blanks <- choice [ try (do + b <- many blankline + followedBy' start + return b), + many1 blankline ] -- whole list must end with blank + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let parsed = case runParser parseBlocks + (state {stateParserContext = ListItemState}) "list item" + raw of + Left err -> error $ "Raw:\n" ++ raw ++ + "\nError:\n" ++ show err + Right result -> result + where raw = concat (first:rest) ++ blanks + return parsed) + +orderedList = try (do + items <- many1 (listItem orderedListStart) + let items' = compactify items + return (OrderedList items')) + +bulletList = try (do + items <- many1 (listItem bulletListStart) + let items' = compactify items + return (BulletList items')) -- -- unknown directive (e.g. comment) -- unknownDirective = try (do - string ".. " - manyTill anyChar newline - many (do {string " "; - char ':'; - many1 (noneOf "\n:"); - char ':'; - many1 (noneOf "\n"); - newline}) - option "" blanklines - return Null) + string ".. " + manyTill anyChar newline + many (do + string " " + char ':' + many1 (noneOf "\n:") + char ':' + many1 (noneOf "\n") + newline) + option "" blanklines + return Null) -- -- reference key @@ -467,39 +486,43 @@ unknownDirective = try (do referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] imageKey = try (do - string ".. |" - ref <- manyTill inline (char '|') - skipSpaces - string "image::" - src <- manyTill anyChar newline - return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) ""))) + string ".. |" + ref <- manyTill inline (char '|') + skipSpaces + string "image::" + src <- manyTill anyChar newline + return (Key (normalizeSpaces ref) + (Src (removeLeadingTrailingSpace src) ""))) anonymousKey = try (do - choice [string ".. __:", string "__"] - skipSpaces - src <- manyTill anyChar newline - state <- getState - return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) ""))) + choice [string ".. __:", string "__"] + skipSpaces + src <- manyTill anyChar newline + state <- getState + return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) ""))) regularKeyQuoted = try (do - string ".. _`" - ref <- manyTill inline (string "`:") - skipSpaces - src <- manyTill anyChar newline - return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) ""))) + string ".. _`" + ref <- manyTill inline (string "`:") + skipSpaces + src <- manyTill anyChar newline + return (Key (normalizeSpaces ref) + (Src (removeLeadingTrailingSpace src) ""))) regularKey = try (do - string ".. _" - ref <- manyTill inline (char ':') - skipSpaces - src <- manyTill anyChar newline - return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) ""))) + string ".. _" + ref <- manyTill inline (char ':') + skipSpaces + src <- manyTill anyChar newline + return (Key (normalizeSpaces ref) + (Src (removeLeadingTrailingSpace src) ""))) -- -- inline -- -text = choice [ strong, emph, code, str, tabchar, whitespace, endline ] <?> "text" +text = choice [ strong, emph, code, str, tabchar, whitespace, + endline ] <?> "text" inline = choice [ escapedChar, special, hyphens, text, symbol ] <?> "inline" @@ -507,7 +530,8 @@ special = choice [ link, image ] <?> "link, inline html, or image" hyphens = try (do result <- many1 (char '-') - option Space endline -- don't want to treat endline after hyphen or dash as a space + option Space endline + -- don't want to treat endline after hyphen or dash as a space return (Str result)) escapedChar = escaped anyChar @@ -517,12 +541,11 @@ symbol = do return (Str [result]) -- parses inline code, between codeStart and codeEnd -code = - try (do - string "``" - result <- manyTill anyChar (string "``") - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result - return (Code result')) +code = try (do + string "``" + result <- manyTill anyChar (string "``") + let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result + return (Code result')) emph = do result <- enclosed (char '*') (char '*') inline @@ -546,99 +569,95 @@ str = do return (Str result) -- an endline character that can be treated as a space, not a structural break -endline = - try (do - newline - notFollowedBy blankline - -- parse potential list starts at beginning of line differently if in a list: - st <- getState - if ((stateParserContext st) == ListItemState) then - notFollowedBy' (choice [orderedListStart, bulletListStart]) - else - option () pzero - return Space) +endline = try (do + newline + notFollowedBy blankline + -- parse potential list-starts at beginning of line differently in a list: + st <- getState + if ((stateParserContext st) == ListItemState) + then notFollowedBy' (choice [orderedListStart, bulletListStart]) + else option () pzero + return Space) -- -- links -- -link = choice [explicitLink, referenceLink, autoLink, oneWordReferenceLink] <?> "link" - -explicitLink = - try (do - char '`' - label <- manyTill inline (try (do {spaces; char '<'})) - src <- manyTill (noneOf ">\n ") (char '>') - skipSpaces - string "`_" - return (Link (normalizeSpaces label) (Src (removeLeadingTrailingSpace src) ""))) - -anonymousLinkEnding = - try (do - char '_' - state <- getState - let anonKeys = stateKeyBlocks state - -- if there's a list of anon key refs (from previous pass), pop one off. - -- otherwise return an anon key ref for the next pass to take care of... - case anonKeys of - (Key [Str "_"] src):rest -> - do{ setState (state { stateKeyBlocks = rest }); - return src } - otherwise -> return (Ref [Str "_"])) - -referenceLink = - try (do - char '`' - label <- manyTill inline (string "`_") - src <- option (Ref []) anonymousLinkEnding - return (Link (normalizeSpaces label) src)) - -oneWordReferenceLink = - try (do - label <- many1 alphaNum - char '_' - src <- option (Ref []) anonymousLinkEnding - notFollowedBy alphaNum -- because this_is_not a link - return (Link [Str label] src)) - -uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", "mailto:", - "news:", "telnet:" ] +link = choice [explicitLink, referenceLink, autoLink, + oneWordReferenceLink] <?> "link" + +explicitLink = try (do + char '`' + label <- manyTill inline (try (do {spaces; char '<'})) + src <- manyTill (noneOf ">\n ") (char '>') + skipSpaces + string "`_" + return (Link (normalizeSpaces label) + (Src (removeLeadingTrailingSpace src) ""))) + +anonymousLinkEnding = try (do + char '_' + state <- getState + let anonKeys = stateKeyBlocks state + -- if there's a list of anon key refs (from previous pass), pop one off. + -- otherwise return an anon key ref for the next pass to take care of... + case anonKeys of + (Key [Str "_"] src):rest -> + do + setState (state { stateKeyBlocks = rest }) + return src + otherwise -> return (Ref [Str "_"])) + +referenceLink = try (do + char '`' + label <- manyTill inline (string "`_") + src <- option (Ref []) anonymousLinkEnding + return (Link (normalizeSpaces label) src)) + +oneWordReferenceLink = try (do + label <- many1 alphaNum + char '_' + src <- option (Ref []) anonymousLinkEnding + notFollowedBy alphaNum -- because this_is_not a link + return (Link [Str label] src)) + +uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", + "mailto:", "news:", "telnet:" ] uri = try (do - scheme <- uriScheme - identifier <- many1 (noneOf " \t\n") - return (scheme ++ identifier)) + scheme <- uriScheme + identifier <- many1 (noneOf " \t\n") + return (scheme ++ identifier)) autoURI = try (do - src <- uri - return (Link [Str src] (Src src ""))) + src <- uri + return (Link [Str src] (Src src ""))) emailChar = alphaNum <|> oneOf "-+_." emailAddress = try (do - firstLetter <- alphaNum - restAddr <- many emailChar - let addr = firstLetter:restAddr - char '@' - dom <- domain - return (addr ++ '@':dom)) + firstLetter <- alphaNum + restAddr <- many emailChar + let addr = firstLetter:restAddr + char '@' + dom <- domain + return (addr ++ '@':dom)) domainChar = alphaNum <|> char '-' domain = try (do - first <- many1 domainChar - dom <- many1 (try (do{ char '.'; many1 domainChar })) - return (joinWithSep "." (first:dom))) + first <- many1 domainChar + dom <- many1 (try (do{ char '.'; many1 domainChar })) + return (joinWithSep "." (first:dom))) autoEmail = try (do - src <- emailAddress - return (Link [Str src] (Src ("mailto:" ++ src) ""))) + src <- emailAddress + return (Link [Str src] (Src ("mailto:" ++ src) ""))) autoLink = autoURI <|> autoEmail -- For now, we assume that all substitution references are for images. -image = - try (do - char '|' - ref <- manyTill inline (char '|') - return (Image (normalizeSpaces ref) (Ref ref))) +image = try (do + char '|' + ref <- manyTill inline (char '|') + return (Image (normalizeSpaces ref) (Ref ref))) |