aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs85
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs118
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs601
-rw-r--r--src/Text/Pandoc/Readers/RST.hs631
4 files changed, 750 insertions, 685 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f9a738e94..c157f3b0e 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,4 +1,14 @@
--- | Converts HTML to 'Pandoc' document.
+{- |
+ Module : Text.Pandoc.Readers.HTML
+ Copyright : Copyright (C) 2006 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm at berkeley dot edu>
+ Stability : unstable
+ Portability : portable
+
+Conversion of HTML to 'Pandoc' document.
+-}
module Text.Pandoc.Readers.HTML (
readHtml,
rawHtmlInline,
@@ -30,10 +40,11 @@ testString = testStringWith parseHtml
-- Constants
--
-inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "br", "cite",
- "code", "dfn", "em", "font", "i", "img", "input", "kbd", "label", "q",
- "s", "samp", "select", "small", "span", "strike", "strong", "sub",
- "sup", "textarea", "tt", "u", "var"]
+inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
+ "br", "cite", "code", "dfn", "em", "font", "i", "img",
+ "input", "kbd", "label", "q", "s", "samp", "select",
+ "small", "span", "strike", "strong", "sub", "sup",
+ "textarea", "tt", "u", "var"]
--
-- HTML utility functions
@@ -50,9 +61,10 @@ inlinesTilEnd tag = try (do
return inlines)
-- extract type from a tag: e.g. br from <br>, < br >, </br>, etc.
-extractTagType tag = case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of
- Just [match] -> (map toLower match)
- Nothing -> ""
+extractTagType tag =
+ case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of
+ Just [match] -> (map toLower match)
+ Nothing -> ""
anyHtmlTag = try (do
char '<'
@@ -90,7 +102,8 @@ htmlTag tag = try (do
-- parses a quoted html attribute value
quoted quoteChar = do
- result <- between (char quoteChar) (char quoteChar) (many (noneOf [quoteChar]))
+ result <- between (char quoteChar) (char quoteChar)
+ (many (noneOf [quoteChar]))
return (result, [quoteChar])
htmlAttributes = do
@@ -116,9 +129,11 @@ htmlRegularAttribute = try (do
spaces
(content, quoteStr) <- choice [ (quoted '\''),
(quoted '"'),
- (do{ a <- (many (alphaNum <|> (oneOf "-._:")));
- return (a,"")} ) ]
- return (name, content, (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)))
+ (do
+ a <- many (alphaNum <|> (oneOf "-._:"))
+ return (a,"")) ]
+ return (name, content,
+ (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)))
htmlEndTag tag = try (do
char '<'
@@ -135,17 +150,11 @@ isInline tag = (extractTagType tag) `elem` inlineHtmlTags
anyHtmlBlockTag = try (do
tag <- choice [anyHtmlTag, anyHtmlEndTag]
- if isInline tag then
- fail "inline tag"
- else
- return tag)
+ if isInline tag then fail "inline tag" else return tag)
anyHtmlInlineTag = try (do
tag <- choice [ anyHtmlTag, anyHtmlEndTag ]
- if isInline tag then
- return tag
- else
- fail "not an inline tag")
+ if isInline tag then return tag else fail "not an inline tag")
-- scripts must be treated differently, because they can contain <> etc.
htmlScript = try (do
@@ -155,13 +164,11 @@ htmlScript = try (do
rawHtmlBlock = try (do
notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"])
- body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition]
+ body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec,
+ definition]
sp <- (many space)
state <- getState
- if stateParseRaw state then
- return (RawHtml (body ++ sp))
- else
- return Null)
+ if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null)
htmlComment = try (do
string "<!--"
@@ -266,10 +273,10 @@ headerLevel n = try (do
hrule = try (do
(tag, attribs) <- htmlTag "hr"
state <- getState
- if (not (null attribs)) && (stateParseRaw state) then
- unexpected "attributes in hr" -- in this case we want to parse it as raw html
- else
- return HorizontalRule)
+ if (not (null attribs)) && (stateParseRaw state)
+ then -- in this case we want to parse it as raw html
+ unexpected "attributes in hr"
+ else return HorizontalRule)
--
-- code blocks
@@ -352,29 +359,31 @@ inline = choice [ text, special ] <?> "inline"
text = choice [ entity, strong, emph, code, str, linebreak, whitespace ] <?> "text"
-special = choice [ link, image, rawHtmlInline ] <?> "link, inline html, or image"
+special = choice [ link, image, rawHtmlInline ] <?>
+ "link, inline html, or image"
entity = try (do
char '&'
- body <- choice [(many1 letter),
- (try (do{ char '#'; num <- many1 digit; return ("#" ++ num)}))]
+ body <- choice [(many1 letter), (try (do
+ char '#'
+ num <- many1 digit
+ return ("#" ++ num)))]
char ';'
return (Str [fromMaybe '?' (htmlEntityToChar ("&" ++ body ++ ";"))]))
code = try (do
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
- -- remove internal line breaks, leading and trailing space, and decode entities
- let result' = decodeEntities $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
+ -- remove internal line breaks, leading and trailing space,
+ -- and decode entities
+ let result' = decodeEntities $ removeLeadingTrailingSpace $
+ joinWithSep " " $ lines result
return (Code result'))
rawHtmlInline = do
result <- choice [htmlScript, anyHtmlInlineTag]
state <- getState
- if stateParseRaw state then
- return (HtmlInline result)
- else
- return (Str "")
+ if stateParseRaw state then return (HtmlInline result) else return (Str "")
betweenTags tag = try (do
htmlTag tag
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index a62ff7b94..81004b1f1 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,14 @@
--- | Converts LaTeX to 'Pandoc' document.
+{- |
+ Module : Text.Pandoc.Readers.LaTeX
+ Copyright : Copyright (C) 2006 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm at berkeley dot edu>
+ Stability : unstable
+ Portability : portable
+
+Conversion of LaTeX to 'Pandoc' document.
+-}
module Text.Pandoc.Readers.LaTeX (
readLaTeX,
rawLaTeXInline,
@@ -81,7 +91,8 @@ end name = try (do
spaces
return name)
--- | Returns a list of block elements containing the contents of an environment.
+-- | Returns a list of block elements containing the contents of an
+-- environment.
environment name = try (do
begin name
spaces
@@ -104,15 +115,16 @@ anyEnvironment = try (do
-- | Process LaTeX preamble, extracting metadata.
processLaTeXPreamble = do
- manyTill (choice [bibliographic, comment, unknownCommand, nullBlock]) (try (string "\\begin{document}"))
+ manyTill (choice [bibliographic, comment, unknownCommand, nullBlock])
+ (try (string "\\begin{document}"))
spaces
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX = do
- option () processLaTeXPreamble -- preamble might not be present, if a fragment
+ option () processLaTeXPreamble -- preamble might not be present (fragment)
blocks <- parseBlocks
spaces
- option "" (string "\\end{document}") -- if parsing a fragment, this might not be present
+ option "" (string "\\end{document}") -- might not be present (in fragment)
spaces
eof
state <- getState
@@ -122,7 +134,8 @@ parseLaTeX = do
let title' = stateTitle state
let authors' = stateAuthors state
let date' = stateDate state
- return (Pandoc (Meta title' authors' date') (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks)))
+ return (Pandoc (Meta title' authors' date')
+ (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks)))
--
-- parsing blocks
@@ -133,9 +146,10 @@ parseBlocks = do
result <- many block
return result
-block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, comment,
- bibliographic, para, specialEnvironment, itemBlock, unknownEnvironment,
- unknownCommand ] <?> "block"
+block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock,
+ comment, bibliographic, para, specialEnvironment,
+ itemBlock, unknownEnvironment, unknownCommand ] <?>
+ "block"
--
-- header blocks
@@ -157,7 +171,8 @@ headerLevel n = try (do
--
hrule = try (do
- oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", "\\newpage" ]
+ oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
+ "\\newpage" ]
spaces
return HorizontalRule)
@@ -166,8 +181,10 @@ hrule = try (do
--
codeBlock = try (do
- string "\\begin{verbatim}" -- don't use begin function because it gobbles whitespace
- option "" blanklines -- we want to gobble blank lines, but not leading space
+ string "\\begin{verbatim}" -- don't use begin function because it
+ -- gobbles whitespace
+ option "" blanklines -- we want to gobble blank lines, but not
+ -- leading space
contents <- manyTill anyChar (try (string "\\end{verbatim}"))
spaces
return (CodeBlock (stripTrailingNewlines contents)))
@@ -266,7 +283,8 @@ authors = try (do
string "\\author{"
authors <- manyTill anyChar (char '}')
spaces
- let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\\\\\" "\n" authors
+ let authors' = map removeLeadingTrailingSpace $ lines $
+ gsub "\\\\\\\\" "\n" authors
updateState (\state -> state { stateAuthors = authors' })
return Null)
@@ -286,21 +304,19 @@ date = try (do
itemBlock = try (do
("item", _, args) <- command
state <- getState
- if (stateParserContext state == ListItemState) then
- fail "item should be handled by list block"
- else
- if null args then
- return Null
- else
- return (Plain [Str (stripFirstAndLast (head args))]))
+ if (stateParserContext state == ListItemState)
+ then fail "item should be handled by list block"
+ else if null args
+ then return Null
+ else return (Plain [Str (stripFirstAndLast (head args))]))
--
-- raw LaTeX
--
specialEnvironment = do -- these are always parsed as raw
- followedBy' (choice (map (\name -> begin name) ["tabular", "figure", "tabbing", "eqnarry",
- "picture", "table", "verse", "theorem"]))
+ followedBy' (choice (map (\name -> begin name) ["tabular", "figure",
+ "tabbing", "eqnarry", "picture", "table", "verse", "theorem"]))
rawLaTeXEnvironment
-- | Parse any LaTeX environment and return a Para block containing
@@ -316,18 +332,20 @@ rawLaTeXEnvironment = try (do
args <- option [] commandArgs
let argStr = concat args
contents <- manyTill (choice [(many1 (noneOf "\\")),
- (do{ (Para [TeX str]) <- rawLaTeXEnvironment; return str }),
- string "\\"]) (end name')
+ (do
+ (Para [TeX str]) <- rawLaTeXEnvironment
+ return str),
+ string "\\" ])
+ (end name')
spaces
return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++
- (concat contents) ++ "\\end{" ++ name' ++ "}")]))
+ (concat contents) ++ "\\end{" ++ name' ++ "}")]))
unknownEnvironment = try (do
state <- getState
- result <- if stateParseRaw state then -- check to see whether we should include raw TeX
- rawLaTeXEnvironment -- if so, get the whole raw environment
- else
- anyEnvironment -- otherwise just the contents
+ result <- if stateParseRaw state -- check whether we should include raw TeX
+ then rawLaTeXEnvironment -- if so, get whole raw environment
+ else anyEnvironment -- otherwise just the contents
return result)
unknownCommand = try (do
@@ -338,14 +356,12 @@ unknownCommand = try (do
spaces
let argStr = concat args
state <- getState
- if (name == "item") && ((stateParserContext state) == ListItemState) then
- fail "should not be parsed as raw"
- else
- string ""
- if stateParseRaw state then
- return (Plain [TeX ("\\" ++ name ++ star ++ argStr)])
- else
- return (Plain [Str (joinWithSep " " args)]))
+ if (name == "item") && ((stateParserContext state) == ListItemState)
+ then fail "should not be parsed as raw"
+ else string ""
+ if stateParseRaw state
+ then return (Plain [TeX ("\\" ++ name ++ star ++ argStr)])
+ else return (Plain [Str (joinWithSep " " args)]))
-- latex comment
comment = try (do
@@ -358,9 +374,9 @@ comment = try (do
-- inline
--
-inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots, accentedChar,
- specialChar, specialInline, escapedChar, unescapedChar, str,
- endline, whitespace ] <?> "inline"
+inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots,
+ accentedChar, specialChar, specialInline, escapedChar,
+ unescapedChar, str, endline, whitespace ] <?> "inline"
specialInline = choice [ link, image, footnote, rawLaTeXInline ] <?>
"link, raw TeX, note, or image"
@@ -397,8 +413,8 @@ accentTable =
('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
-specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, oslash, pound,
- euro, copyright, sect ]
+specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
+ oslash, pound, euro, copyright, sect ]
ccedil = try (do
char '\\'
@@ -563,15 +579,14 @@ image = try (do
footnote = try (do
(name, _, (contents:[])) <- command
- if ((name == "footnote") || (name == "thanks")) then
- string ""
- else
- fail "not a footnote or thanks command"
+ if ((name == "footnote") || (name == "thanks"))
+ then string ""
+ else fail "not a footnote or thanks command"
let contents' = stripFirstAndLast contents
state <- getState
let blocks = case runParser parseBlocks state "footnote" contents of
- Left err -> error $ "Input:\n" ++ show contents' ++
- "\nError:\n" ++ show err
+ Left err -> error $ "Input:\n" ++ show contents' ++
+ "\nError:\n" ++ show err
Right result -> result
let notes = stateNoteBlocks state
let nextRef = case notes of
@@ -586,8 +601,7 @@ rawLaTeXInline = try (do
(name, star, args) <- command
let argStr = concat args
state <- getState
- if ((name == "begin") || (name == "end") || (name == "item")) then
- fail "not an inline command"
- else
- string ""
+ if ((name == "begin") || (name == "end") || (name == "item"))
+ then fail "not an inline command"
+ else string ""
return (TeX ("\\" ++ name ++ star ++ argStr)))
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 034e5d8e4..9ca73dee5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,4 +1,14 @@
--- | Convert markdown to Pandoc document.
+{- |
+ Module : Text.Pandoc.Readers.Markdown
+ Copyright : Copyright (C) 2006 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm at berkeley dot edu>
+ Stability : unstable
+ Portability : portable
+
+Conversion of markdown-formatted plain text to 'Pandoc' document.
+-}
module Text.Pandoc.Readers.Markdown (
readMarkdown
) where
@@ -8,8 +18,8 @@ import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
import Text.Pandoc.Shared
-import Text.Pandoc.Readers.HTML ( rawHtmlInline, rawHtmlBlock, anyHtmlBlockTag,
- anyHtmlInlineTag )
+import Text.Pandoc.Readers.HTML ( rawHtmlInline, rawHtmlBlock,
+ anyHtmlBlockTag, anyHtmlInlineTag )
import Text.Pandoc.HtmlEntities ( decodeEntities )
import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
@@ -57,9 +67,10 @@ blockQuoteChar = '>'
hyphenChar = '-'
-- treat these as potentially non-text when parsing inline:
-specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, emphStartAlt,
- emphEndAlt, codeStart, codeEnd, autoLinkEnd, autoLinkStart, mathStart,
- mathEnd, imageStart, noteStart, hyphenChar]
+specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
+ emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd,
+ autoLinkStart, mathStart, mathEnd, imageStart, noteStart,
+ hyphenChar]
--
-- auxiliary functions
@@ -115,14 +126,16 @@ numberOfNote (Note ref _) = (read ref)
numberOfNote _ = 0
parseMarkdown = do
- updateState (\state -> state { stateParseRaw = True }) -- need to parse raw HTML
+ updateState (\state -> state { stateParseRaw = True })
+ -- need to parse raw HTML, since markdown allows it
(title, author, date) <- option ([],[],"") titleBlock
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
state <- getState
let keys = reverse $ stateKeyBlocks state
let notes = reverse $ stateNoteBlocks state
- let sortedNotes = sortBy (\x y -> compare (numberOfNote x) (numberOfNote y)) notes
+ let sortedNotes = sortBy (\x y -> compare (numberOfNote x)
+ (numberOfNote y)) notes
return (Pandoc (Meta title author date) (blocks' ++ sortedNotes ++ keys))
--
@@ -133,8 +146,9 @@ parseBlocks = do
result <- manyTill block eof
return result
-block = choice [ codeBlock, note, referenceKey, header, hrule, list, blockQuote, rawHtmlBlocks,
- rawLaTeXEnvironment, para, plain, blankBlock, nullBlock ] <?> "block"
+block = choice [ codeBlock, note, referenceKey, header, hrule, list,
+ blockQuote, rawHtmlBlocks, rawLaTeXEnvironment, para,
+ plain, blankBlock, nullBlock ] <?> "block"
--
-- header blocks
@@ -154,33 +168,33 @@ atxClosing = try (do
newline
option "" blanklines)
-setextHeader = choice (map (\x -> setextH x) (enumFromTo 1 (length setextHChars)))
+setextHeader = choice $
+ map (\x -> setextH x) (enumFromTo 1 (length setextHChars))
setextH n = try (do
- txt <- many1 (do {notFollowedBy newline; inline})
- endline
- many1 (char (setextHChars !! (n-1)))
- skipSpaces
- newline
- option "" blanklines
- return (Header n (normalizeSpaces txt)))
+ txt <- many1 (do {notFollowedBy newline; inline})
+ endline
+ many1 (char (setextHChars !! (n-1)))
+ skipSpaces
+ newline
+ option "" blanklines
+ return (Header n (normalizeSpaces txt)))
--
-- hrule block
--
-hruleWith chr =
- try (do
- skipSpaces
- char chr
- skipSpaces
- char chr
- skipSpaces
- char chr
- skipMany (oneOf (chr:spaceChars))
- newline
- option "" blanklines
- return HorizontalRule)
+hruleWith chr = try (do
+ skipSpaces
+ char chr
+ skipSpaces
+ char chr
+ skipSpaces
+ char chr
+ skipMany (oneOf (chr:spaceChars))
+ newline
+ option "" blanklines
+ return HorizontalRule)
hrule = choice (map hruleWith hruleChars) <?> "hrule"
@@ -189,9 +203,9 @@ hrule = choice (map hruleWith hruleChars) <?> "hrule"
--
indentedLine = try (do
- indentSpaces
- result <- manyTill anyChar newline
- return (result ++ "\n"))
+ indentSpaces
+ result <- manyTill anyChar newline
+ return (result ++ "\n"))
-- two or more indented lines, possibly separated by blank lines
indentedBlock = try (do
@@ -201,62 +215,66 @@ indentedBlock = try (do
return (res1 ++ blanks ++ res2))
codeBlock = do
- result <- choice [indentedBlock, indentedLine]
- option "" blanklines
- return (CodeBlock (stripTrailingNewlines result))
+ result <- choice [indentedBlock, indentedLine]
+ option "" blanklines
+ return (CodeBlock (stripTrailingNewlines result))
--
-- note block
--
rawLine = try (do
- notFollowedBy' blankline
- notFollowedBy' noteMarker
- contents <- many1 nonEndline
- end <- option "" (do
- newline
- option "" indentSpaces
- return "\n")
- return (contents ++ end))
+ notFollowedBy' blankline
+ notFollowedBy' noteMarker
+ contents <- many1 nonEndline
+ end <- option "" (do
+ newline
+ option "" indentSpaces
+ return "\n")
+ return (contents ++ end))
rawLines = do
lines <- many1 rawLine
return (concat lines)
note = try (do
- ref <- noteMarker
- char ':'
- skipSpaces
- skipEndline
- raw <- sepBy rawLines (try (do {blankline; indentSpaces}))
- option "" blanklines
- -- parse the extracted text, which may contain various block elements:
- state <- getState
- let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of
- Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err
- Right result -> result
- let identifiers = stateNoteIdentifiers state
- case (findIndex (== ref) identifiers) of
- Just n -> updateState (\s -> s {stateNoteBlocks =
- (Note (show (n+1)) parsed):(stateNoteBlocks s)})
- Nothing -> updateState id
- return Null)
+ ref <- noteMarker
+ char ':'
+ skipSpaces
+ skipEndline
+ raw <- sepBy rawLines (try (do {blankline; indentSpaces}))
+ option "" blanklines
+ -- parse the extracted text, which may contain various block elements:
+ state <- getState
+ let parsed = case runParser parseBlocks
+ (state {stateParserContext = BlockQuoteState}) "block"
+ ((joinWithSep "\n" raw) ++ "\n\n") of
+ Left err -> error $ "Raw block:\n" ++ show raw ++
+ "\nError:\n" ++ show err
+ Right result -> result
+ let identifiers = stateNoteIdentifiers state
+ case (findIndex (== ref) identifiers) of
+ Just n -> updateState (\s -> s {stateNoteBlocks =
+ (Note (show (n+1)) parsed):(stateNoteBlocks s)})
+ Nothing -> updateState id
+ return Null)
--
-- block quotes
--
emacsBoxQuote = try (do
- string ",----"
- manyTill anyChar newline
- raw <- manyTill (try (do{ char '|';
- option ' ' (char ' ');
- result <- manyTill anyChar newline;
- return result}))
- (string "`----")
- manyTill anyChar newline
- option "" blanklines
- return raw)
+ string ",----"
+ manyTill anyChar newline
+ raw <- manyTill (try (do
+ char '|'
+ option ' ' (char ' ')
+ result <- manyTill anyChar newline
+ return result))
+ (string "`----")
+ manyTill anyChar newline
+ option "" blanklines
+ return raw)
emailBlockQuoteStart = try (do
skipNonindentSpaces
@@ -265,24 +283,28 @@ emailBlockQuoteStart = try (do
return "> ")
emailBlockQuote = try (do
- emailBlockQuoteStart
- raw <- sepBy (many (choice [nonEndline,
- (try (do{ endline;
- notFollowedBy' emailBlockQuoteStart;
- return '\n'}))]))
- (try (do {newline; emailBlockQuoteStart}))
- newline <|> (do{ eof; return '\n'})
- option "" blanklines
- return raw)
+ emailBlockQuoteStart
+ raw <- sepBy (many (choice [nonEndline,
+ (try (do
+ endline
+ notFollowedBy' emailBlockQuoteStart
+ return '\n'))]))
+ (try (do {newline; emailBlockQuoteStart}))
+ newline <|> (do{ eof; return '\n' })
+ option "" blanklines
+ return raw)
blockQuote = do
- raw <- choice [ emailBlockQuote, emacsBoxQuote ]
- -- parse the extracted block, which may contain various block elements:
- state <- getState
- let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of
- Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err
- Right result -> result
- return (BlockQuote parsed)
+ raw <- choice [ emailBlockQuote, emacsBoxQuote ]
+ -- parse the extracted block, which may contain various block elements:
+ state <- getState
+ let parsed = case runParser parseBlocks
+ (state {stateParserContext = BlockQuoteState}) "block"
+ ((joinWithSep "\n" raw) ++ "\n\n") of
+ Left err -> error $ "Raw block:\n" ++ show raw ++
+ "\nError:\n" ++ show err
+ Right result -> result
+ return (BlockQuote parsed)
--
-- list blocks
@@ -290,85 +312,81 @@ blockQuote = do
list = choice [ bulletList, orderedList ] <?> "list"
-bulletListStart =
- try (do
- option ' ' newline -- if preceded by a Plain block in a list context
- skipNonindentSpaces
- notFollowedBy' hrule -- because hrules start out just like lists
- oneOf bulletListMarkers
- spaceChar
- skipSpaces)
-
-orderedListStart =
- try (do
- option ' ' newline -- if preceded by a Plain block in a list context
- skipNonindentSpaces
- many1 digit <|> count 1 letter
- oneOf orderedListDelimiters
- oneOf spaceChars
- skipSpaces)
+bulletListStart = try (do
+ option ' ' newline -- if preceded by a Plain block in a list context
+ skipNonindentSpaces
+ notFollowedBy' hrule -- because hrules start out just like lists
+ oneOf bulletListMarkers
+ spaceChar
+ skipSpaces)
+
+orderedListStart = try (do
+ option ' ' newline -- if preceded by a Plain block in a list context
+ skipNonindentSpaces
+ many1 digit <|> count 1 letter
+ oneOf orderedListDelimiters
+ oneOf spaceChars
+ skipSpaces)
-- parse a line of a list item (start = parser for beginning of list item)
listLine start = try (do
notFollowedBy' start
notFollowedBy blankline
- notFollowedBy' (do{ indentSpaces;
- many (spaceChar);
- choice [bulletListStart, orderedListStart]})
+ notFollowedBy' (do
+ indentSpaces
+ many (spaceChar)
+ choice [bulletListStart, orderedListStart])
line <- manyTill anyChar newline
return (line ++ "\n"))
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem start =
- try (do
- start
- result <- many1 (listLine start)
- blanks <- many blankline
- return ((concat result) ++ blanks))
+rawListItem start = try (do
+ start
+ result <- many1 (listLine start)
+ blanks <- many blankline
+ return ((concat result) ++ blanks))
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
-listContinuation start =
- try (do
- followedBy' indentSpaces
- result <- many1 (listContinuationLine start)
- blanks <- many blankline
- return ((concat result) ++ blanks))
+listContinuation start = try (do
+ followedBy' indentSpaces
+ result <- many1 (listContinuationLine start)
+ blanks <- many blankline
+ return ((concat result) ++ blanks))
listContinuationLine start = try (do
- notFollowedBy' blankline
- notFollowedBy' start
- option "" indentSpaces
- result <- manyTill anyChar newline
- return (result ++ "\n"))
-
-listItem start =
- try (do
- first <- rawListItem start
- rest <- many (listContinuation start)
- -- parsing with ListItemState forces markers at beginning of lines to
- -- count as list item markers, even if not separated by blank space.
- -- see definition of "endline"
- state <- getState
- let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState})
- "block" raw of
- Left err -> error $ "Raw block:\n" ++ raw ++ "\nError:\n" ++ show err
- Right result -> result
- where raw = concat (first:rest)
- return parsed)
-
-orderedList =
- try (do
- items <- many1 (listItem orderedListStart)
- let items' = compactify items
- return (OrderedList items'))
-
-bulletList =
- try (do
- items <- many1 (listItem bulletListStart)
- let items' = compactify items
- return (BulletList items'))
+ notFollowedBy' blankline
+ notFollowedBy' start
+ option "" indentSpaces
+ result <- manyTill anyChar newline
+ return (result ++ "\n"))
+
+listItem start = try (do
+ first <- rawListItem start
+ rest <- many (listContinuation start)
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let parsed = case runParser parseBlocks
+ (state {stateParserContext = ListItemState})
+ "block" raw of
+ Left err -> error $ "Raw block:\n" ++ raw ++
+ "\nError:\n" ++ show err
+ Right result -> result
+ where raw = concat (first:rest)
+ return parsed)
+
+orderedList = try (do
+ items <- many1 (listItem orderedListStart)
+ let items' = compactify items
+ return (OrderedList items'))
+
+bulletList = try (do
+ items <- many1 (listItem bulletListStart)
+ let items' = compactify items
+ return (BulletList items'))
--
-- paragraph block
@@ -377,7 +395,10 @@ bulletList =
para = try (do
result <- many1 inline
newline
- choice [ (do{ followedBy' (oneOfStrings [">", ",----"]); return "" }), blanklines ]
+ choice [ (do
+ followedBy' (oneOfStrings [">", ",----"])
+ return "" ),
+ blanklines ]
let result' = normalizeSpaces result
return (Para result'))
@@ -391,30 +412,28 @@ plain = do
--
rawHtmlBlocks = try (do
- htmlBlocks <- many1 rawHtmlBlock
- let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
- let combined' = if (last combined == '\n') then
- init combined -- strip extra newline
- else
- combined
- return (RawHtml combined'))
+ htmlBlocks <- many1 rawHtmlBlock
+ let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
+ let combined' = if (last combined == '\n')
+ then init combined -- strip extra newline
+ else combined
+ return (RawHtml combined'))
--
-- reference key
--
-referenceKey =
- try (do
- skipSpaces
- label <- reference
- char labelSep
- skipSpaces
- option ' ' (char autoLinkStart)
- src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars))
- option ' ' (char autoLinkEnd)
- tit <- option "" title
- blanklines
- return (Key label (Src (removeTrailingSpace src) tit)))
+referenceKey = try (do
+ skipSpaces
+ label <- reference
+ char labelSep
+ skipSpaces
+ option ' ' (char autoLinkStart)
+ src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars))
+ option ' ' (char autoLinkEnd)
+ tit <- option "" title
+ blanklines
+ return (Key label (Src (removeTrailingSpace src) tit)))
--
-- inline
@@ -423,10 +442,11 @@ referenceKey =
text = choice [ math, strong, emph, code2, code1, str, linebreak, tabchar,
whitespace, endline ] <?> "text"
-inline = choice [ rawLaTeXInline, escapedChar, special, hyphens, text, ltSign, symbol ] <?> "inline"
+inline = choice [ rawLaTeXInline, escapedChar, special, hyphens, text,
+ ltSign, symbol ] <?> "inline"
-special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline, autoLink,
- image ] <?> "link, inline html, note, or image"
+special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline,
+ autoLink, image ] <?> "link, inline html, note, or image"
escapedChar = escaped anyChar
@@ -443,30 +463,33 @@ symbol = do
hyphens = try (do
result <- many1 (char '-')
- if (length result) == 1 then
- skipEndline -- don't want to treat endline after hyphen as a space
- else
- do{ string ""; return Space }
+ if (length result) == 1
+ then skipEndline -- don't want to treat endline after hyphen as a space
+ else do{ string ""; return Space }
return (Str result))
-- parses inline code, between codeStart and codeEnd
-code1 =
- try (do
- char codeStart
- result <- many (noneOf [codeEnd])
- char codeEnd
- let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines
- return (Code result'))
+code1 = try (do
+ char codeStart
+ result <- many (noneOf [codeEnd])
+ char codeEnd
+ -- get rid of any internal newlines
+ let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
+ return (Code result'))
-- parses inline code, between 2 codeStarts and 2 codeEnds
-code2 =
- try (do
- string [codeStart, codeStart]
- result <- manyTill anyChar (try (string [codeEnd, codeEnd]))
- let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines
- return (Code result'))
-
-mathWord = many1 (choice [(noneOf (" \t\n\\" ++ [mathEnd])), (try (do {c <- char '\\'; notFollowedBy (char mathEnd); return c}))])
+code2 = try (do
+ string [codeStart, codeStart]
+ result <- manyTill anyChar (try (string [codeEnd, codeEnd]))
+ let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
+ -- get rid of any internal newlines
+ return (Code result'))
+
+mathWord = many1 (choice [ (noneOf (" \t\n\\" ++ [mathEnd])),
+ (try (do
+ c <- char '\\'
+ notFollowedBy (char mathEnd)
+ return c))])
math = try (do
char mathStart
@@ -477,12 +500,14 @@ math = try (do
emph = do
result <- choice [ (enclosed (char emphStart) (char emphEnd) inline),
- (enclosed (char emphStartAlt) (char emphEndAlt) inline) ]
+ (enclosed (char emphStartAlt) (char emphEndAlt) inline) ]
return (Emph (normalizeSpaces result))
strong = do
- result <- choice [ (enclosed (count 2 (char emphStart)) (count 2 (char emphEnd)) inline),
- (enclosed (count 2 (char emphStartAlt)) (count 2 (char emphEndAlt)) inline)]
+ result <- choice [ (enclosed (count 2 (char emphStart))
+ (count 2 (char emphEnd)) inline),
+ (enclosed (count 2 (char emphStartAlt))
+ (count 2 (char emphEndAlt)) inline) ]
return (Strong (normalizeSpaces result))
whitespace = do
@@ -507,23 +532,21 @@ str = do
return (Str (decodeEntities result))
-- an endline character that can be treated as a space, not a structural break
-endline =
- try (do
- newline
- -- next line would allow block quotes without preceding blank line
- -- Markdown.pl does allow this, but there's a chance of a wrapped
- -- greater-than sign triggering a block quote by accident...
--- notFollowedBy' (choice [emailBlockQuoteStart, string ",----"])
- notFollowedBy blankline
- -- parse potential list starts at beginning of line differently if in a list:
- st <- getState
- if (stateParserContext st) == ListItemState then
- do
- notFollowedBy' orderedListStart
- notFollowedBy' bulletListStart
- else
- option () pzero
- return Space)
+endline = try (do
+ newline
+ -- next line would allow block quotes without preceding blank line
+ -- Markdown.pl does allow this, but there's a chance of a wrapped
+ -- greater-than sign triggering a block quote by accident...
+ -- notFollowedBy' (choice [emailBlockQuoteStart, string ",----"])
+ notFollowedBy blankline
+ -- parse potential list-starts differently if in a list:
+ st <- getState
+ if (stateParserContext st) == ListItemState
+ then do
+ notFollowedBy' orderedListStart
+ notFollowedBy' bulletListStart
+ else option () pzero
+ return Space)
--
-- links
@@ -537,92 +560,92 @@ reference = do
return (normalizeSpaces label)
-- source for a link, with optional title
-source =
- try (do
- char srcStart
- option ' ' (char autoLinkStart)
- src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners))
- option ' ' (char autoLinkEnd)
- tit <- option "" title
- skipSpaces
- char srcEnd
- return (Src (removeTrailingSpace src) tit))
-
-titleWith startChar endChar =
- try (do
- skipSpaces
- skipEndline -- a title can be on the next line from the source
- skipSpaces
- char startChar
- tit <- manyTill (choice [ try (do {char '\\'; char endChar}),
- (noneOf (endChar:endLineChars)) ]) (char endChar)
- let tit' = gsub "\"" "&quot;" tit
- return tit')
-
-title = choice [titleWith '(' ')', titleWith '"' '"', titleWith '\'' '\''] <?> "title"
+source = try (do
+ char srcStart
+ option ' ' (char autoLinkStart)
+ src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners))
+ option ' ' (char autoLinkEnd)
+ tit <- option "" title
+ skipSpaces
+ char srcEnd
+ return (Src (removeTrailingSpace src) tit))
+
+titleWith startChar endChar = try (do
+ skipSpaces
+ skipEndline -- a title can be on the next line from the source
+ skipSpaces
+ char startChar
+ tit <- manyTill (choice [ try (do {char '\\'; char endChar}),
+ (noneOf (endChar:endLineChars)) ]) (char endChar)
+ let tit' = gsub "\"" "&quot;" tit
+ return tit')
+
+title = choice [ titleWith '(' ')',
+ titleWith '"' '"',
+ titleWith '\'' '\''] <?> "title"
link = choice [explicitLink, referenceLink] <?> "link"
-explicitLink =
- try (do
- label <- reference
- src <- source
- return (Link label src))
+explicitLink = try (do
+ label <- reference
+ src <- source
+ return (Link label src))
referenceLink = choice [referenceLinkDouble, referenceLinkSingle]
-referenceLinkDouble = -- a link like [this][/url/]
- try (do
- label <- reference
- skipSpaces
- skipEndline
- skipSpaces
- ref <- reference
- return (Link label (Ref ref)))
-
-referenceLinkSingle = -- a link like [this]
- try (do
- label <- reference
- return (Link label (Ref [])))
-
-autoLink = -- a link <like.this.com>
- try (do
- notFollowedBy' anyHtmlBlockTag
- src <- between (char autoLinkStart) (char autoLinkEnd)
- (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd])))
- case (matchRegex emailAddress src) of
- Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) ""))
- Nothing -> return (Link [Str src] (Src src "")))
-
-emailAddress = mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace
-
-image =
- try (do
- char imageStart
- (Link label src) <- link
- return (Image label src))
+-- a link like [this][/url/]
+referenceLinkDouble = try (do
+ label <- reference
+ skipSpaces
+ skipEndline
+ skipSpaces
+ ref <- reference
+ return (Link label (Ref ref)))
+
+-- a link like [this]
+referenceLinkSingle = try (do
+ label <- reference
+ return (Link label (Ref [])))
+
+-- a link <like.this.com>
+autoLink = try (do
+ notFollowedBy' anyHtmlBlockTag
+ src <- between (char autoLinkStart) (char autoLinkEnd)
+ (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd])))
+ case (matchRegex emailAddress src) of
+ Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) ""))
+ Nothing -> return (Link [Str src] (Src src "")))
+
+emailAddress =
+ mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace
+
+image = try (do
+ char imageStart
+ (Link label src) <- link
+ return (Image label src))
noteMarker = try (do
- char labelStart
- char noteStart
- manyTill (noneOf " \t\n") (char labelEnd))
+ char labelStart
+ char noteStart
+ manyTill (noneOf " \t\n") (char labelEnd))
noteRef = try (do
- ref <- noteMarker
- state <- getState
- let identifiers = (stateNoteIdentifiers state) ++ [ref]
- updateState (\st -> st {stateNoteIdentifiers = identifiers})
- return (NoteRef (show (length identifiers))))
+ ref <- noteMarker
+ state <- getState
+ let identifiers = (stateNoteIdentifiers state) ++ [ref]
+ updateState (\st -> st {stateNoteIdentifiers = identifiers})
+ return (NoteRef (show (length identifiers))))
inlineNote = try (do
- char noteStart
- char labelStart
- contents <- manyTill inline (char labelEnd)
- state <- getState
- let identifiers = stateNoteIdentifiers state
- let ref = show $ (length identifiers) + 1
- let noteBlocks = stateNoteBlocks state
- updateState (\st -> st {stateNoteIdentifiers = (identifiers ++ [ref]),
- stateNoteBlocks = (Note ref [Para contents]):noteBlocks})
- return (NoteRef ref))
+ char noteStart
+ char labelStart
+ contents <- manyTill inline (char labelEnd)
+ state <- getState
+ let identifiers = stateNoteIdentifiers state
+ let ref = show $ (length identifiers) + 1
+ let noteBlocks = stateNoteBlocks state
+ updateState (\st -> st {stateNoteIdentifiers = (identifiers ++ [ref]),
+ stateNoteBlocks =
+ (Note ref [Para contents]):noteBlocks})
+ return (NoteRef ref))
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 69c7d9baa..1672e06dc 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1,4 +1,14 @@
--- | Parse reStructuredText and return Pandoc document.
+{- |
+ Module : Text.Pandoc.Readers.RST
+ Copyright : Copyright (C) 2006 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm at berkeley dot edu>
+ Stability : unstable
+ Portability : portable
+
+Conversion from reStructuredText to 'Pandoc' document.
+-}
module Text.Pandoc.Readers.RST (
readRST
) where
@@ -61,16 +71,14 @@ promoteHeaders num [] = []
-- promote all the other headers.
titleTransform :: [Block] -- ^ list of blocks
-> ([Block], [Inline]) -- ^ modified list of blocks, title
-titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title and subtitle
- if (any isHeader1 rest) || (any isHeader2 rest) then
- ((Header 1 head1):(Header 2 head2):rest, [])
- else
- ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
+titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
+ if (any isHeader1 rest) || (any isHeader2 rest)
+ then ((Header 1 head1):(Header 2 head2):rest, [])
+ else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
titleTransform ((Header 1 head1):rest) = -- title, no subtitle
- if (any isHeader1 rest) then
- ((Header 1 head1):rest, [])
- else
- ((promoteHeaders 1 rest), head1)
+ if (any isHeader1 rest)
+ then ((Header 1 head1):rest, [])
+ else ((promoteHeaders 1 rest), head1)
titleTransform blocks = (blocks, [])
parseRST = do
@@ -78,17 +86,18 @@ parseRST = do
input <- getInput
blocks <- parseBlocks -- first pass
let anonymousKeys = filter isAnonKeyBlock blocks
- let blocks' = if (null anonymousKeys) then
- blocks
- else -- run parser again to fill in anonymous links...
- case runParser parseBlocks (state { stateKeyBlocks = anonymousKeys })
+ let blocks' = if (null anonymousKeys)
+ then blocks
+ else -- run parser again to fill in anonymous links...
+ case runParser parseBlocks
+ (state { stateKeyBlocks = anonymousKeys })
"RST source, second pass" input of
- Left err -> error $ "\nError:\n" ++ show err
- Right result -> (filter isNotAnonKeyBlock result)
- let (blocks'', title) = if stateStandalone state then
- titleTransform blocks'
- else
- (blocks', [])
+ Left err -> error $ "\nError:\n" ++ show err
+ Right result ->
+ (filter isNotAnonKeyBlock result)
+ let (blocks'', title) = if stateStandalone state
+ then titleTransform blocks'
+ else (blocks', [])
state <- getState
let authors = stateAuthors state
let date = stateDate state
@@ -103,9 +112,10 @@ parseBlocks = do
result <- manyTill block eof
return result
-block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, referenceKey,
- imageBlock, unknownDirective, header, hrule, list, fieldList, lineBlock,
- para, plain, blankBlock, nullBlock ] <?> "block"
+block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote,
+ referenceKey, imageBlock, unknownDirective, header,
+ hrule, list, fieldList, lineBlock, para, plain,
+ blankBlock, nullBlock ] <?> "block"
--
-- field list
@@ -117,28 +127,32 @@ fieldListItem = try (do
string ": "
skipSpaces
first <- manyTill anyChar newline
- rest <- many (do{ notFollowedBy (char ':');
- notFollowedBy blankline;
- skipSpaces;
- manyTill anyChar newline })
+ rest <- many (do
+ notFollowedBy (char ':')
+ notFollowedBy blankline
+ skipSpaces
+ manyTill anyChar newline )
return (name, (joinWithSep " " (first:rest))))
fieldList = try (do
items <- many1 fieldListItem
blanklines
let authors = case (lookup "Authors" items) of
- Just auth -> [auth]
- Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
+ Just auth -> [auth]
+ Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
let date = case (lookup "Date" items) of
- Just dat -> dat
- Nothing -> ""
+ Just dat -> dat
+ Nothing -> ""
let title = case (lookup "Title" items) of
- Just tit -> [Str tit]
- Nothing -> []
- let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && (x /= "Date") &&
- (x /= "Title")) items
- let result = map (\(x,y) -> Para [Strong [Str x], Str ":", Space, Str y]) remaining
- updateState (\st -> st { stateAuthors = authors, stateDate = date, stateTitle = title })
+ Just tit -> [Str tit]
+ Nothing -> []
+ let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") &&
+ (x /= "Date") && (x /= "Title")) items
+ let result = map (\(x,y) ->
+ Para [Strong [Str x], Str ":", Space, Str y]) remaining
+ updateState (\st -> st { stateAuthors = authors,
+ stateDate = date,
+ stateTitle = title })
return (BlockQuote result))
--
@@ -164,18 +178,17 @@ lineBlock = try (do
para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph"
codeBlockStart = try (do
- string "::"
- blankline
- blankline)
+ string "::"
+ blankline
+ blankline)
-- paragraph that ends in a :: starting a code block
paraBeforeCodeBlock = try (do
result <- many1 (do {notFollowedBy' codeBlockStart; inline})
followedBy' (string "::")
- return (Para (if (last result == Space) then
- normalizeSpaces result
- else
- (normalizeSpaces result) ++ [Str ":"])))
+ return (Para (if (last result == Space)
+ then normalizeSpaces result
+ else (normalizeSpaces result) ++ [Str ":"])))
-- regular paragraph
paraNormal = try (do
@@ -195,9 +208,9 @@ plain = do
--
imageBlock = try (do
- string ".. image:: "
- src <- manyTill anyChar newline
- return (Plain [Image [Str "image"] (Src src "")]))
+ string ".. image:: "
+ src <- manyTill anyChar newline
+ return (Plain [Image [Str "image"] (Src src "")]))
--
-- header blocks
@@ -207,59 +220,58 @@ header = choice [ doubleHeader, singleHeader ] <?> "header"
-- a header with lines on top and bottom
doubleHeader = try (do
- c <- oneOf underlineChars
- rest <- many (char c) -- the top line
- let lenTop = length (c:rest)
- skipSpaces
- newline
- txt <- many1 (do {notFollowedBy blankline; inline})
- pos <- getPosition
- let len = (sourceColumn pos) - 1
- if (len > lenTop) then fail "title longer than border" else (do {return ()})
- blankline -- spaces and newline
- count lenTop (char c) -- the bottom line
- blanklines
- -- check to see if we've had this kind of header before.
- -- if so, get appropriate level. if not, add to list.
- state <- getState
- let headerTable = stateHeaderTable state
- let (headerTable', level) = case findIndex (== DoubleHeader c) headerTable of
- Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
- setState (state { stateHeaderTable = headerTable' })
- return (Header level (normalizeSpaces txt)))
+ c <- oneOf underlineChars
+ rest <- many (char c) -- the top line
+ let lenTop = length (c:rest)
+ skipSpaces
+ newline
+ txt <- many1 (do {notFollowedBy blankline; inline})
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ if (len > lenTop) then fail "title longer than border" else (do {return ()})
+ blankline -- spaces and newline
+ count lenTop (char c) -- the bottom line
+ blanklines
+ -- check to see if we've had this kind of header before.
+ -- if so, get appropriate level. if not, add to list.
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ return (Header level (normalizeSpaces txt)))
-- a header with line on the bottom only
singleHeader = try (do
- notFollowedBy' whitespace
- txt <- many1 (do {notFollowedBy blankline; inline})
- pos <- getPosition
- let len = (sourceColumn pos) - 1
- blankline
- c <- oneOf underlineChars
- rest <- count (len - 1) (char c)
- many (char c)
- blanklines
- state <- getState
- let headerTable = stateHeaderTable state
- let (headerTable', level) = case findIndex (== SingleHeader c) headerTable of
- Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
- setState (state { stateHeaderTable = headerTable' })
- return (Header level (normalizeSpaces txt)))
+ notFollowedBy' whitespace
+ txt <- many1 (do {notFollowedBy blankline; inline})
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ blankline
+ c <- oneOf underlineChars
+ rest <- count (len - 1) (char c)
+ many (char c)
+ blanklines
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ return (Header level (normalizeSpaces txt)))
--
-- hrule block
--
-hruleWith chr =
- try (do
- count 4 (char chr)
- skipMany (char chr)
- skipSpaces
- newline
- blanklines
- return HorizontalRule)
+hruleWith chr = try (do
+ count 4 (char chr)
+ skipMany (char chr)
+ skipSpaces
+ newline
+ blanklines
+ return HorizontalRule)
hrule = choice (map hruleWith underlineChars) <?> "hrule"
@@ -269,9 +281,9 @@ hrule = choice (map hruleWith underlineChars) <?> "hrule"
-- read a line indented by a given string
indentedLine indents = try (do
- string indents
- result <- manyTill anyChar newline
- return (result ++ "\n"))
+ string indents
+ result <- manyTill anyChar newline
+ return (result ++ "\n"))
-- two or more indented lines, possibly separated by blank lines
-- if variable = True, then any indent will work, but it must be consistent through the block
@@ -279,54 +291,59 @@ indentedLine indents = try (do
indentedBlock variable = try (do
state <- getState
let tabStop = stateTabStop state
- indents <- if variable then
- many1 (oneOf " \t")
- else
- oneOfStrings ["\t", (replicate tabStop ' ')]
+ indents <- if variable
+ then many1 (oneOf " \t")
+ else oneOfStrings ["\t", (replicate tabStop ' ')]
firstline <- manyTill anyChar newline
rest <- many (choice [ indentedLine indents,
- try (do {b <- blanklines; l <- indentedLine indents; return (b ++ l)})])
+ try (do
+ b <- blanklines
+ l <- indentedLine indents
+ return (b ++ l))])
option "" blanklines
return (firstline ++ "\n" ++ (concat rest)))
codeBlock = try (do
- codeBlockStart
- result <- indentedBlock False -- the False means we want one tab stop indent on each line
- return (CodeBlock (stripTrailingNewlines result)))
+ codeBlockStart
+ result <- indentedBlock False
+ -- the False means we want one tab stop indent on each line
+ return (CodeBlock (stripTrailingNewlines result)))
--
-- raw html
--
rawHtmlBlock = try (do
- string ".. raw:: html"
- blanklines
- result <- indentedBlock True
- return (RawHtml result))
+ string ".. raw:: html"
+ blanklines
+ result <- indentedBlock True
+ return (RawHtml result))
--
-- raw latex
--
rawLaTeXBlock = try (do
- string ".. raw:: latex"
- blanklines
- result <- indentedBlock True
- return (Para [(TeX result)]))
+ string ".. raw:: latex"
+ blanklines
+ result <- indentedBlock True
+ return (Para [(TeX result)]))
--
-- block quotes
--
blockQuote = try (do
- block <- indentedBlock True
- -- parse the extracted block, which may contain various block elements:
- state <- getState
- let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState})
- "block" (block ++ "\n\n") of
- Left err -> error $ "Raw block:\n" ++ show block ++ "\nError:\n" ++ show err
- Right result -> result
- return (BlockQuote parsed))
+ block <- indentedBlock True
+ -- parse the extracted block, which may contain various block elements:
+ state <- getState
+ let parsed = case runParser parseBlocks
+ (state {stateParserContext = BlockQuoteState})
+ "block" (block ++ "\n\n") of
+ Left err -> error $ "Raw block:\n" ++ show block ++
+ "\nError:\n" ++ show err
+ Right result -> result
+ return (BlockQuote parsed))
--
-- list blocks
@@ -335,34 +352,36 @@ blockQuote = try (do
list = choice [ bulletList, orderedList ] <?> "list"
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart =
- try (do
- notFollowedBy' hrule -- because hrules start out just like lists
- marker <- oneOf bulletListMarkers
- white <- many1 spaceChar
- let len = length (marker:white)
- return len)
+bulletListStart = try (do
+ notFollowedBy' hrule -- because hrules start out just like lists
+ marker <- oneOf bulletListMarkers
+ white <- many1 spaceChar
+ let len = length (marker:white)
+ return len)
withPeriodSuffix parser = try (do
- a <- parser
- b <- char '.'
- return (a ++ [b]))
+ a <- parser
+ b <- char '.'
+ return (a ++ [b]))
withParentheses parser = try (do
- a <- char '('
- b <- parser
- c <- char ')'
- return ([a] ++ b ++ [c]))
+ a <- char '('
+ b <- parser
+ c <- char ')'
+ return ([a] ++ b ++ [c]))
withRightParen parser = try (do
- a <- parser
- b <- char ')'
- return (a ++ [b]))
+ a <- parser
+ b <- char ')'
+ return (a ++ [b]))
upcaseWord = map toUpper
romanNumeral = do
- let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii", "xxiv" ]
+ let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi",
+ "vii", "viii", "ix", "x", "xi", "xii", "xiii",
+ "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx",
+ "xxi", "xxii", "xxiii", "xxiv" ]
let upperNumerals = map upcaseWord lowerNumerals
result <- choice $ map string (lowerNumerals ++ upperNumerals)
return result
@@ -372,15 +391,14 @@ orderedListEnumerator = choice [ many1 digit,
count 1 letter,
romanNumeral ]
--- parses ordered list start and returns its length (inc. following whitespace)
-orderedListStart =
- try (do
- marker <- choice [ withPeriodSuffix orderedListEnumerator,
- withParentheses orderedListEnumerator,
- withRightParen orderedListEnumerator ]
- white <- many1 spaceChar
- let len = length (marker ++ white)
- return len)
+-- parses ordered list start and returns its length (inc following whitespace)
+orderedListStart = try (do
+ marker <- choice [ withPeriodSuffix orderedListEnumerator,
+ withParentheses orderedListEnumerator,
+ withRightParen orderedListEnumerator ]
+ white <- many1 spaceChar
+ let len = length (marker ++ white)
+ return len)
-- parse a line of a list item
listLine markerLength = try (do
@@ -393,72 +411,73 @@ listLine markerLength = try (do
indentWith num = do
state <- getState
let tabStop = stateTabStop state
- if (num < tabStop) then
- count num (char ' ')
- else
- choice [ try (count num (char ' ')),
- (try (do {char '\t'; count (num - tabStop) (char ' ')})) ]
+ if (num < tabStop)
+ then count num (char ' ')
+ else choice [ try (count num (char ' ')),
+ (try (do {char '\t'; count (num - tabStop) (char ' ')})) ]
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem start =
- try (do
- markerLength <- start
- firstLine <- manyTill anyChar newline
- restLines <- many (listLine markerLength)
- return (markerLength, (firstLine ++ "\n" ++ (concat restLines))))
-
--- continuation of a list item - indented and separated by blankline or (in compact lists)
--- endline. Note: nested lists are parsed as continuations.
-listContinuation markerLength =
- try (do
- blanks <- many1 blankline
- result <- many1 (listLine markerLength)
- return (blanks ++ (concat result)))
-
-listItem start =
- try (do
- (markerLength, first) <- rawListItem start
- rest <- many (listContinuation markerLength)
- blanks <- choice [ try (do {b <- many blankline; followedBy' start; return b}),
- many1 blankline ] -- whole list must end with blank
- -- parsing with ListItemState forces markers at beginning of lines to
- -- count as list item markers, even if not separated by blank space.
- -- see definition of "endline"
- state <- getState
- let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState})
- "list item" raw of
- Left err -> error $ "Raw:\n" ++ raw ++ "\nError:\n" ++ show err
- Right result -> result
- where raw = concat (first:rest) ++ blanks
- return parsed)
-
-orderedList =
- try (do
- items <- many1 (listItem orderedListStart)
- let items' = compactify items
- return (OrderedList items'))
-
-bulletList =
- try (do
- items <- many1 (listItem bulletListStart)
- let items' = compactify items
- return (BulletList items'))
+rawListItem start = try (do
+ markerLength <- start
+ firstLine <- manyTill anyChar newline
+ restLines <- many (listLine markerLength)
+ return (markerLength, (firstLine ++ "\n" ++ (concat restLines))))
+
+-- continuation of a list item - indented and separated by blankline or
+-- (in compact lists) endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation markerLength = try (do
+ blanks <- many1 blankline
+ result <- many1 (listLine markerLength)
+ return (blanks ++ (concat result)))
+
+listItem start = try (do
+ (markerLength, first) <- rawListItem start
+ rest <- many (listContinuation markerLength)
+ blanks <- choice [ try (do
+ b <- many blankline
+ followedBy' start
+ return b),
+ many1 blankline ] -- whole list must end with blank
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let parsed = case runParser parseBlocks
+ (state {stateParserContext = ListItemState}) "list item"
+ raw of
+ Left err -> error $ "Raw:\n" ++ raw ++
+ "\nError:\n" ++ show err
+ Right result -> result
+ where raw = concat (first:rest) ++ blanks
+ return parsed)
+
+orderedList = try (do
+ items <- many1 (listItem orderedListStart)
+ let items' = compactify items
+ return (OrderedList items'))
+
+bulletList = try (do
+ items <- many1 (listItem bulletListStart)
+ let items' = compactify items
+ return (BulletList items'))
--
-- unknown directive (e.g. comment)
--
unknownDirective = try (do
- string ".. "
- manyTill anyChar newline
- many (do {string " ";
- char ':';
- many1 (noneOf "\n:");
- char ':';
- many1 (noneOf "\n");
- newline})
- option "" blanklines
- return Null)
+ string ".. "
+ manyTill anyChar newline
+ many (do
+ string " "
+ char ':'
+ many1 (noneOf "\n:")
+ char ':'
+ many1 (noneOf "\n")
+ newline)
+ option "" blanklines
+ return Null)
--
-- reference key
@@ -467,39 +486,43 @@ unknownDirective = try (do
referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
imageKey = try (do
- string ".. |"
- ref <- manyTill inline (char '|')
- skipSpaces
- string "image::"
- src <- manyTill anyChar newline
- return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))
+ string ".. |"
+ ref <- manyTill inline (char '|')
+ skipSpaces
+ string "image::"
+ src <- manyTill anyChar newline
+ return (Key (normalizeSpaces ref)
+ (Src (removeLeadingTrailingSpace src) "")))
anonymousKey = try (do
- choice [string ".. __:", string "__"]
- skipSpaces
- src <- manyTill anyChar newline
- state <- getState
- return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) "")))
+ choice [string ".. __:", string "__"]
+ skipSpaces
+ src <- manyTill anyChar newline
+ state <- getState
+ return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) "")))
regularKeyQuoted = try (do
- string ".. _`"
- ref <- manyTill inline (string "`:")
- skipSpaces
- src <- manyTill anyChar newline
- return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))
+ string ".. _`"
+ ref <- manyTill inline (string "`:")
+ skipSpaces
+ src <- manyTill anyChar newline
+ return (Key (normalizeSpaces ref)
+ (Src (removeLeadingTrailingSpace src) "")))
regularKey = try (do
- string ".. _"
- ref <- manyTill inline (char ':')
- skipSpaces
- src <- manyTill anyChar newline
- return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))
+ string ".. _"
+ ref <- manyTill inline (char ':')
+ skipSpaces
+ src <- manyTill anyChar newline
+ return (Key (normalizeSpaces ref)
+ (Src (removeLeadingTrailingSpace src) "")))
--
-- inline
--
-text = choice [ strong, emph, code, str, tabchar, whitespace, endline ] <?> "text"
+text = choice [ strong, emph, code, str, tabchar, whitespace,
+ endline ] <?> "text"
inline = choice [ escapedChar, special, hyphens, text, symbol ] <?> "inline"
@@ -507,7 +530,8 @@ special = choice [ link, image ] <?> "link, inline html, or image"
hyphens = try (do
result <- many1 (char '-')
- option Space endline -- don't want to treat endline after hyphen or dash as a space
+ option Space endline
+ -- don't want to treat endline after hyphen or dash as a space
return (Str result))
escapedChar = escaped anyChar
@@ -517,12 +541,11 @@ symbol = do
return (Str [result])
-- parses inline code, between codeStart and codeEnd
-code =
- try (do
- string "``"
- result <- manyTill anyChar (string "``")
- let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
- return (Code result'))
+code = try (do
+ string "``"
+ result <- manyTill anyChar (string "``")
+ let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
+ return (Code result'))
emph = do
result <- enclosed (char '*') (char '*') inline
@@ -546,99 +569,95 @@ str = do
return (Str result)
-- an endline character that can be treated as a space, not a structural break
-endline =
- try (do
- newline
- notFollowedBy blankline
- -- parse potential list starts at beginning of line differently if in a list:
- st <- getState
- if ((stateParserContext st) == ListItemState) then
- notFollowedBy' (choice [orderedListStart, bulletListStart])
- else
- option () pzero
- return Space)
+endline = try (do
+ newline
+ notFollowedBy blankline
+ -- parse potential list-starts at beginning of line differently in a list:
+ st <- getState
+ if ((stateParserContext st) == ListItemState)
+ then notFollowedBy' (choice [orderedListStart, bulletListStart])
+ else option () pzero
+ return Space)
--
-- links
--
-link = choice [explicitLink, referenceLink, autoLink, oneWordReferenceLink] <?> "link"
-
-explicitLink =
- try (do
- char '`'
- label <- manyTill inline (try (do {spaces; char '<'}))
- src <- manyTill (noneOf ">\n ") (char '>')
- skipSpaces
- string "`_"
- return (Link (normalizeSpaces label) (Src (removeLeadingTrailingSpace src) "")))
-
-anonymousLinkEnding =
- try (do
- char '_'
- state <- getState
- let anonKeys = stateKeyBlocks state
- -- if there's a list of anon key refs (from previous pass), pop one off.
- -- otherwise return an anon key ref for the next pass to take care of...
- case anonKeys of
- (Key [Str "_"] src):rest ->
- do{ setState (state { stateKeyBlocks = rest });
- return src }
- otherwise -> return (Ref [Str "_"]))
-
-referenceLink =
- try (do
- char '`'
- label <- manyTill inline (string "`_")
- src <- option (Ref []) anonymousLinkEnding
- return (Link (normalizeSpaces label) src))
-
-oneWordReferenceLink =
- try (do
- label <- many1 alphaNum
- char '_'
- src <- option (Ref []) anonymousLinkEnding
- notFollowedBy alphaNum -- because this_is_not a link
- return (Link [Str label] src))
-
-uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", "mailto:",
- "news:", "telnet:" ]
+link = choice [explicitLink, referenceLink, autoLink,
+ oneWordReferenceLink] <?> "link"
+
+explicitLink = try (do
+ char '`'
+ label <- manyTill inline (try (do {spaces; char '<'}))
+ src <- manyTill (noneOf ">\n ") (char '>')
+ skipSpaces
+ string "`_"
+ return (Link (normalizeSpaces label)
+ (Src (removeLeadingTrailingSpace src) "")))
+
+anonymousLinkEnding = try (do
+ char '_'
+ state <- getState
+ let anonKeys = stateKeyBlocks state
+ -- if there's a list of anon key refs (from previous pass), pop one off.
+ -- otherwise return an anon key ref for the next pass to take care of...
+ case anonKeys of
+ (Key [Str "_"] src):rest ->
+ do
+ setState (state { stateKeyBlocks = rest })
+ return src
+ otherwise -> return (Ref [Str "_"]))
+
+referenceLink = try (do
+ char '`'
+ label <- manyTill inline (string "`_")
+ src <- option (Ref []) anonymousLinkEnding
+ return (Link (normalizeSpaces label) src))
+
+oneWordReferenceLink = try (do
+ label <- many1 alphaNum
+ char '_'
+ src <- option (Ref []) anonymousLinkEnding
+ notFollowedBy alphaNum -- because this_is_not a link
+ return (Link [Str label] src))
+
+uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
+ "mailto:", "news:", "telnet:" ]
uri = try (do
- scheme <- uriScheme
- identifier <- many1 (noneOf " \t\n")
- return (scheme ++ identifier))
+ scheme <- uriScheme
+ identifier <- many1 (noneOf " \t\n")
+ return (scheme ++ identifier))
autoURI = try (do
- src <- uri
- return (Link [Str src] (Src src "")))
+ src <- uri
+ return (Link [Str src] (Src src "")))
emailChar = alphaNum <|> oneOf "-+_."
emailAddress = try (do
- firstLetter <- alphaNum
- restAddr <- many emailChar
- let addr = firstLetter:restAddr
- char '@'
- dom <- domain
- return (addr ++ '@':dom))
+ firstLetter <- alphaNum
+ restAddr <- many emailChar
+ let addr = firstLetter:restAddr
+ char '@'
+ dom <- domain
+ return (addr ++ '@':dom))
domainChar = alphaNum <|> char '-'
domain = try (do
- first <- many1 domainChar
- dom <- many1 (try (do{ char '.'; many1 domainChar }))
- return (joinWithSep "." (first:dom)))
+ first <- many1 domainChar
+ dom <- many1 (try (do{ char '.'; many1 domainChar }))
+ return (joinWithSep "." (first:dom)))
autoEmail = try (do
- src <- emailAddress
- return (Link [Str src] (Src ("mailto:" ++ src) "")))
+ src <- emailAddress
+ return (Link [Str src] (Src ("mailto:" ++ src) "")))
autoLink = autoURI <|> autoEmail
-- For now, we assume that all substitution references are for images.
-image =
- try (do
- char '|'
- ref <- manyTill inline (char '|')
- return (Image (normalizeSpaces ref) (Ref ref)))
+image = try (do
+ char '|'
+ ref <- manyTill inline (char '|')
+ return (Image (normalizeSpaces ref) (Ref ref)))