aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-08-15 06:00:58 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-08-15 06:00:58 +0000
commita8e2199034679c07411c76c42ab1ffb52b170029 (patch)
tree2ce7c3be138e30936210196fa828816298139ec6 /src/Text/Pandoc/Readers
parente814a3f6d23f640b1aed5b7cb949459d514a3e33 (diff)
downloadpandoc-a8e2199034679c07411c76c42ab1ffb52b170029.tar.gz
Major code cleanup in all modules. (Removed unneeded imports,
reformatted, etc.) More major changes are documented below: + Removed Text.Pandoc.ParserCombinators and moved all its definitions to Text.Pandoc.Shared. + In Text.Pandoc.Shared: - Removed unneeded 'try' in blanklines. - Removed endsWith function and rewrote functions to use isSuffixOf instead. - Added >>~ combinator. - Rewrote stripTrailingNewlines, removeLeadingSpaces. + Moved Text.Pandoc.Entities -> Text.Pandoc.CharacterReferences. - Removed unneeded functions charToEntity, charToNumericalEntity. - Renamed functions using proper terminology (character references, not entities). decodeEntities -> decodeCharacterReferences, characterEntity -> characterReference. - Moved escapeStringToXML to Docbook writer, which is the only thing that uses it. - Removed old entity parser in HTML and Markdown readers; replaced with new charRef parser in Text.Pandoc.Shared. + Fixed accent bug in Text.Pandoc.Readers.LaTeX: \^{} now correctly parses as a '^' character. + Text.Pandoc.ASCIIMathML is no longer an exported module. git-svn-id: https://pandoc.googlecode.com/svn/trunk@835 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs362
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs536
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs662
-rw-r--r--src/Text/Pandoc/Readers/RST.hs321
4 files changed, 811 insertions, 1070 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 1742667b8..1eb5d7b4a 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -41,12 +41,12 @@ module Text.Pandoc.Readers.HTML (
) where
import Text.ParserCombinators.Parsec
-import Text.Pandoc.ParserCombinators
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( characterEntity, decodeEntities )
+import Text.Pandoc.CharacterReferences ( characterReference,
+ decodeCharacterReferences )
import Data.Maybe ( fromMaybe )
-import Data.List ( intersect, takeWhile, dropWhile )
+import Data.List ( takeWhile, dropWhile )
import Data.Char ( toUpper, toLower, isAlphaNum )
-- | Convert HTML-formatted string to 'Pandoc' document.
@@ -55,10 +55,6 @@ readHtml :: ParserState -- ^ Parser state
-> Pandoc
readHtml = readWith parseHtml
--- for testing
-testString :: String -> IO ()
-testString = testStringWith parseHtml
-
--
-- Constants
--
@@ -74,26 +70,18 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
--
-- | Read blocks until end tag.
-blocksTilEnd tag = try (do
- blocks <- manyTill (do {b <- block; spaces; return b}) (htmlEndTag tag)
- return $ filter (/= Null) blocks)
+blocksTilEnd tag = do
+ blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
+ return $ filter (/= Null) blocks
-- | Read inlines until end tag.
-inlinesTilEnd tag = try (do
- inlines <- manyTill inline (htmlEndTag tag)
- return inlines)
+inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
-- | Parse blocks between open and close tag.
-blocksIn tag = try $ do
- htmlTag tag
- spaces
- blocksTilEnd tag
+blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
-- | Parse inlines between open and close tag.
-inlinesIn tag = try $ do
- htmlTag tag
- spaces
- inlinesTilEnd tag
+inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
-- | Extract type from a tag: e.g. @br@ from @\<br\>@
extractTagType :: String -> String
@@ -103,19 +91,19 @@ extractTagType ('<':rest) =
extractTagType _ = ""
-- | Parse any HTML tag (closing or opening) and return text of tag
-anyHtmlTag = try (do
+anyHtmlTag = try $ do
char '<'
spaces
tag <- many1 alphaNum
attribs <- htmlAttributes
spaces
ender <- option "" (string "/")
- let ender' = if (null ender) then "" else " /"
+ let ender' = if null ender then "" else " /"
spaces
char '>'
- return ("<" ++ tag ++ attribs ++ ender' ++ ">"))
+ return $ "<" ++ tag ++ attribs ++ ender' ++ ">"
-anyHtmlEndTag = try (do
+anyHtmlEndTag = try $ do
char '<'
spaces
char '/'
@@ -123,19 +111,19 @@ anyHtmlEndTag = try (do
tagType <- many1 alphaNum
spaces
char '>'
- return ("</" ++ tagType ++ ">"))
+ return $ "</" ++ tagType ++ ">"
htmlTag :: String -> GenParser Char st (String, [(String, String)])
-htmlTag tag = try (do
+htmlTag tag = try $ do
char '<'
spaces
stringAnyCase tag
attribs <- many htmlAttribute
spaces
- option "" (string "/")
+ optional (string "/")
spaces
char '>'
- return (tag, (map (\(name, content, raw) -> (name, content)) attribs)))
+ return (tag, (map (\(name, content, raw) -> (name, content)) attribs))
-- parses a quoted html attribute value
quoted quoteChar = do
@@ -145,20 +133,20 @@ quoted quoteChar = do
htmlAttributes = do
attrList <- many htmlAttribute
- return (concatMap (\(name, content, raw) -> raw) attrList)
+ return $ concatMap (\(name, content, raw) -> raw) attrList
htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
--- minimized boolean attribute (no = and value)
-htmlMinimizedAttribute = try (do
+-- minimized boolean attribute
+htmlMinimizedAttribute = try $ do
many1 space
name <- many1 (choice [letter, oneOf ".-_:"])
spaces
notFollowedBy (char '=')
let content = name
- return (name, content, (" " ++ name)))
+ return (name, content, (" " ++ name))
-htmlRegularAttribute = try (do
+htmlRegularAttribute = try $ do
many1 space
name <- many1 (choice [letter, oneOf ".-_:"])
spaces
@@ -170,10 +158,10 @@ htmlRegularAttribute = try (do
a <- many (alphaNum <|> (oneOf "-._:"))
return (a,"")) ]
return (name, content,
- (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)))
+ (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))
-- | Parse an end tag of type 'tag'
-htmlEndTag tag = try (do
+htmlEndTag tag = try $ do
char '<'
spaces
char '/'
@@ -181,87 +169,83 @@ htmlEndTag tag = try (do
stringAnyCase tag
spaces
char '>'
- return ("</" ++ tag ++ ">"))
+ return $ "</" ++ tag ++ ">"
-- | Returns @True@ if the tag is an inline tag.
isInline tag = (extractTagType tag) `elem` inlineHtmlTags
-anyHtmlBlockTag = try (do
- tag <- choice [anyHtmlTag, anyHtmlEndTag]
- if isInline tag then fail "inline tag" else return tag)
+anyHtmlBlockTag = try $ do
+ tag <- anyHtmlTag <|> anyHtmlEndTag
+ if isInline tag then fail "inline tag" else return tag
-anyHtmlInlineTag = try (do
- tag <- choice [ anyHtmlTag, anyHtmlEndTag ]
- if isInline tag then return tag else fail "not an inline tag")
+anyHtmlInlineTag = try $ do
+ tag <- anyHtmlTag <|> anyHtmlEndTag
+ if isInline tag then return tag else fail "not an inline tag"
-- | Parses material between script tags.
-- Scripts must be treated differently, because they can contain '<>' etc.
-htmlScript = try (do
+htmlScript = try $ do
open <- string "<script"
rest <- manyTill anyChar (htmlEndTag "script")
- return (open ++ rest ++ "</script>"))
+ return $ open ++ rest ++ "</script>"
htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ]
-rawHtmlBlock = try (do
- notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"])
+rawHtmlBlock = try $ do
+ notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
body <- htmlBlockElement <|> anyHtmlBlockTag
- sp <- (many space)
+ sp <- many space
state <- getState
- if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null)
+ if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null
-- | Parses an HTML comment.
-htmlComment = try (do
+htmlComment = try $ do
string "<!--"
comment <- manyTill anyChar (try (string "-->"))
- return ("<!--" ++ comment ++ "-->"))
+ return $ "<!--" ++ comment ++ "-->"
--
-- parsing documents
--
-xmlDec = try (do
+xmlDec = try $ do
string "<?"
rest <- manyTill anyChar (char '>')
- return ("<?" ++ rest ++ ">"))
+ return $ "<?" ++ rest ++ ">"
-definition = try (do
+definition = try $ do
string "<!"
rest <- manyTill anyChar (char '>')
- return ("<!" ++ rest ++ ">"))
+ return $ "<!" ++ rest ++ ">"
-nonTitleNonHead = try (do
- notFollowedBy' (htmlTag "title")
- notFollowedBy' (htmlTag "/head")
- result <- choice [do {rawHtmlBlock; return ' '}, anyChar]
- return result)
+nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >>
+ ((rawHtmlBlock >> return ' ') <|> anyChar)
-parseTitle = try (do
- (tag, attribs) <- htmlTag "title"
+parseTitle = try $ do
+ (tag, _) <- htmlTag "title"
contents <- inlinesTilEnd tag
spaces
- return contents)
+ return contents
-- parse header and return meta-information (for now, just title)
-parseHead = try (do
+parseHead = try $ do
htmlTag "head"
spaces
skipMany nonTitleNonHead
contents <- option [] parseTitle
skipMany nonTitleNonHead
htmlTag "/head"
- return (contents, [], ""))
+ return (contents, [], "")
-skipHtmlTag tag = option ("",[]) (htmlTag tag)
+skipHtmlTag tag = optional (htmlTag tag)
-- h1 class="title" representation of title in body
-bodyTitle = try (do
+bodyTitle = try $ do
(tag, attribs) <- htmlTag "h1"
cl <- case (extractAttribute "class" attribs) of
- Just "title" -> do {return ""}
+ Just "title" -> return ""
otherwise -> fail "not title"
inlinesTilEnd "h1"
- return "")
parseHtml = do
sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
@@ -271,27 +255,30 @@ parseHtml = do
spaces
skipHtmlTag "body"
spaces
- option "" bodyTitle -- skip title in body, because it's represented in meta
+ optional bodyTitle -- skip title in body, because it's represented in meta
blocks <- parseBlocks
spaces
- option "" (htmlEndTag "body")
+ optional (htmlEndTag "body")
spaces
- option "" (htmlEndTag "html")
+ optional (htmlEndTag "html")
many anyChar -- ignore anything after </html>
eof
- return (Pandoc (Meta title authors date) blocks)
+ return $ Pandoc (Meta title authors date) blocks
--
-- parsing blocks
--
-parseBlocks = do
- spaces
- result <- sepEndBy block spaces
- return $ filter (/= Null) result
+parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
-block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain,
- rawHtmlBlock ] <?> "block"
+block = choice [ codeBlock
+ , header
+ , hrule
+ , list
+ , blockQuote
+ , para
+ , plain
+ , rawHtmlBlock ] <?> "block"
--
-- header blocks
@@ -299,53 +286,49 @@ block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain,
header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
-headerLevel n = try (do
+headerLevel n = try $ do
let level = "h" ++ show n
(tag, attribs) <- htmlTag level
contents <- inlinesTilEnd level
- return (Header n (normalizeSpaces contents)))
+ return $ Header n (normalizeSpaces contents)
--
-- hrule block
--
-hrule = try (do
+hrule = try $ do
(tag, attribs) <- htmlTag "hr"
state <- getState
- if (not (null attribs)) && (stateParseRaw state)
- then -- in this case we want to parse it as raw html
- unexpected "attributes in hr"
- else return HorizontalRule)
+ if not (null attribs) && stateParseRaw state
+ then unexpected "attributes in hr" -- parse as raw in this case
+ else return HorizontalRule
--
-- code blocks
--
-codeBlock = choice [ preCodeBlock, bareCodeBlock ] <?> "code block"
+codeBlock = preCodeBlock <|> bareCodeBlock <?> "code block"
-preCodeBlock = try (do
+preCodeBlock = try $ do
htmlTag "pre"
spaces
- htmlTag "code"
- result <- manyTill anyChar (htmlEndTag "code")
+ result <- bareCodeBlock
spaces
htmlEndTag "pre"
- return (CodeBlock (stripTrailingNewlines (decodeEntities result))))
+ return result
-bareCodeBlock = try (do
+bareCodeBlock = try $ do
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
- return (CodeBlock (stripTrailingNewlines (decodeEntities result))))
+ return $ CodeBlock $ stripTrailingNewlines $
+ decodeCharacterReferences result
--
-- block quotes
--
-blockQuote = try (do
- tag <- htmlTag "blockquote"
- spaces
- blocks <- blocksTilEnd "blockquote"
- return (BlockQuote blocks))
+blockQuote = try $ htmlTag "blockquote" >> spaces >>
+ blocksTilEnd "blockquote" >>= (return . BlockQuote)
--
-- list blocks
@@ -354,119 +337,105 @@ blockQuote = try (do
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
orderedList = try $ do
- (_, attribs) <- htmlTag "ol"
- (start, style) <- option (1, DefaultStyle) $
- do failIfStrict
- let sta = fromMaybe "1" $
- lookup "start" attribs
- let sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- let sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ -> DefaultStyle
- return (read sta, sty')
- spaces
- items <- sepEndBy1 (blocksIn "li") spaces
- htmlEndTag "ol"
- return (OrderedList (start, style, DefaultDelim) items)
+ (_, attribs) <- htmlTag "ol"
+ (start, style) <- option (1, DefaultStyle) $
+ do failIfStrict
+ let sta = fromMaybe "1" $
+ lookup "start" attribs
+ let sty = fromMaybe (fromMaybe "" $
+ lookup "style" attribs) $
+ lookup "class" attribs
+ let sty' = case sty of
+ "lower-roman" -> LowerRoman
+ "upper-roman" -> UpperRoman
+ "lower-alpha" -> LowerAlpha
+ "upper-alpha" -> UpperAlpha
+ "decimal" -> Decimal
+ _ -> DefaultStyle
+ return (read sta, sty')
+ spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
+ htmlEndTag "ol"
+ return $ OrderedList (start, style, DefaultDelim) items
bulletList = try $ do
- htmlTag "ul"
- spaces
- items <- sepEndBy1 (blocksIn "li") spaces
- htmlEndTag "ul"
- return (BulletList items)
+ htmlTag "ul"
+ spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
+ htmlEndTag "ul"
+ return $ BulletList items
definitionList = try $ do
- failIfStrict -- def lists not part of standard markdown
- tag <- htmlTag "dl"
- spaces
- items <- sepEndBy1 definitionListItem spaces
- htmlEndTag "dl"
- return (DefinitionList items)
+ failIfStrict -- def lists not part of standard markdown
+ tag <- htmlTag "dl"
+ spaces
+ items <- sepEndBy1 definitionListItem spaces
+ htmlEndTag "dl"
+ return $ DefinitionList items
definitionListItem = try $ do
- terms <- sepEndBy1 (inlinesIn "dt") spaces
- defs <- sepEndBy1 (blocksIn "dd") spaces
- let term = joinWithSep [LineBreak] terms
- return (term, concat defs)
+ terms <- sepEndBy1 (inlinesIn "dt") spaces
+ defs <- sepEndBy1 (blocksIn "dd") spaces
+ let term = joinWithSep [LineBreak] terms
+ return (term, concat defs)
--
-- paragraph block
--
-para = try (do
- tag <- htmlTag "p"
- result <- inlinesTilEnd "p"
- return (Para (normalizeSpaces result)))
+para = htmlTag "p" >> inlinesTilEnd "p" >>= return . Para . normalizeSpaces
--
-- plain block
--
-plain = do
- result <- many1 inline
- return (Plain (normalizeSpaces result))
+plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- inline
--
-inline = choice [ text, special ] <?> "inline"
-
-text = choice [ entity, strong, emph, superscript, subscript,
- strikeout, spanStrikeout, code, str,
- linebreak, whitespace ] <?> "text"
-
-special = choice [ link, image, rawHtmlInline ] <?>
- "link, inline html, or image"
-
-entity = do
- ent <- characterEntity
- return $ Str [ent]
-
-code = try (do
+inline = choice [ charRef
+ , strong
+ , emph
+ , superscript
+ , subscript
+ , strikeout
+ , spanStrikeout
+ , code
+ , str
+ , linebreak
+ , whitespace
+ , link
+ , image
+ , rawHtmlInline ] <?> "inline"
+
+code = try $ do
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
-- remove internal line breaks, leading and trailing space,
- -- and decode entities
- let result' = decodeEntities $ removeLeadingTrailingSpace $
- joinWithSep " " $ lines result
- return (Code result'))
+ -- and decode character references
+ return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
+ joinWithSep " " $ lines result
rawHtmlInline = do
- result <- choice [htmlScript, anyHtmlInlineTag]
+ result <- htmlScript <|> anyHtmlInlineTag
state <- getState
if stateParseRaw state then return (HtmlInline result) else return (Str "")
-betweenTags tag = try (do
- htmlTag tag
- result <- inlinesTilEnd tag
- return (normalizeSpaces result))
+betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
+ return . normalizeSpaces
-emph = try (do
- result <- choice [betweenTags "em", betweenTags "it"]
- return (Emph result))
+emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph
-superscript = try $ do
- failIfStrict -- strict markdown has no superscript, so treat as raw HTML
- result <- betweenTags "sup"
- return (Superscript result)
+strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
-subscript = try $ do
- failIfStrict -- strict markdown has no subscript, so treat as raw HTML
- result <- betweenTags "sub"
- return (Subscript result)
+superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
-strikeout = try $ do
- failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
- result <- choice [betweenTags "s", betweenTags "strike"]
- return (Strikeout result)
+subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
+
+strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
+ return . Strikeout
spanStrikeout = try $ do
failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
@@ -474,25 +443,14 @@ spanStrikeout = try $ do
result <- case (extractAttribute "class" attributes) of
Just "strikeout" -> inlinesTilEnd "span"
_ -> fail "not a strikeout"
- return (Strikeout result)
+ return $ Strikeout result
-strong = try (do
- result <- choice [betweenTags "b", betweenTags "strong"]
- return (Strong result))
-
-whitespace = do
- many1 space
- return Space
+whitespace = many1 space >> return Space
-- hard line break
-linebreak = do
- htmlTag "br"
- option ' ' newline
- return LineBreak
+linebreak = htmlTag "br" >> optional newline >> return LineBreak
-str = do
- result <- many1 (noneOf "<& \t\n")
- return (Str result)
+str = many1 (noneOf "<& \t\n") >>= return . Str
--
-- links and images
@@ -501,27 +459,27 @@ str = do
-- extract contents of attribute (attribute names are case-insensitive)
extractAttribute name [] = Nothing
extractAttribute name ((attrName, contents):rest) =
- let name' = map toLower name
- attrName' = map toLower attrName in
- if (attrName' == name')
- then Just (decodeEntities contents)
- else extractAttribute name rest
+ let name' = map toLower name
+ attrName' = map toLower attrName
+ in if attrName' == name'
+ then Just (decodeCharacterReferences contents)
+ else extractAttribute name rest
link = try $ do
(tag, attributes) <- htmlTag "a"
url <- case (extractAttribute "href" attributes) of
- Just url -> do {return url}
+ Just url -> return url
Nothing -> fail "no href"
- let title = fromMaybe "" (extractAttribute "title" attributes)
+ let title = fromMaybe "" $ extractAttribute "title" attributes
label <- inlinesTilEnd "a"
return $ Link (normalizeSpaces label) (url, title)
image = try $ do
(tag, attributes) <- htmlTag "img"
url <- case (extractAttribute "src" attributes) of
- Just url -> do {return url}
+ Just url -> return url
Nothing -> fail "no src"
- let title = fromMaybe "" (extractAttribute "title" attributes)
+ let title = fromMaybe "" $ extractAttribute "title" attributes
let alt = fromMaybe "" (extractAttribute "alt" attributes)
return $ Image [Str alt] (url, title)
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 73a3e4a8f..4b91b528c 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -34,7 +34,6 @@ module Text.Pandoc.Readers.LaTeX (
) where
import Text.ParserCombinators.Parsec
-import Text.Pandoc.ParserCombinators
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Data.Maybe ( fromMaybe )
@@ -47,9 +46,6 @@ readLaTeX :: ParserState -- ^ Parser state, including options for parser
-> Pandoc
readLaTeX = readWith parseLaTeX
--- for testing
-testString = testStringWith parseLaTeX
-
-- characters with special meaning
specialChars = "\\$%&^&_~#{}\n \t|<>'\"-"
@@ -58,12 +54,12 @@ specialChars = "\\$%&^&_~#{}\n \t|<>'\"-"
--
-- | Returns text between brackets and its matching pair.
-bracketedText openB closeB = try (do
+bracketedText openB closeB = do
result <- charsInBalanced' openB closeB
- return ([openB] ++ result ++ [closeB]))
+ return $ [openB] ++ result ++ [closeB]
-- | Returns an option or argument of a LaTeX command.
-optOrArg = choice [ (bracketedText '{' '}'), (bracketedText '[' ']') ]
+optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
-- | True if the string begins with '{'.
isArg ('{':rest) = True
@@ -73,62 +69,55 @@ isArg other = False
commandArgs = many optOrArg
-- | Parses LaTeX command, returns (name, star, list of options or arguments).
-command = try (do
+command = try $ do
char '\\'
name <- many1 alphaNum
star <- option "" (string "*") -- some commands have starred versions
args <- commandArgs
- return (name, star, args))
+ return (name, star, args)
-begin name = try (do
- string "\\begin{"
- string name
- char '}'
- option [] commandArgs
+begin name = try $ do
+ string $ "\\begin{" ++ name ++ "}"
+ optional commandArgs
spaces
- return name)
+ return name
-end name = try (do
- string "\\end{"
- string name
- char '}'
+end name = try $ do
+ string $ "\\end{" ++ name ++ "}"
spaces
- return name)
+ return name
-- | Returns a list of block elements containing the contents of an
-- environment.
-environment name = try (do
- begin name
- spaces
- contents <- manyTill block (end name)
- return contents)
+environment name = try $ begin name >> spaces >> manyTill block (end name)
-anyEnvironment = try (do
+anyEnvironment = try $ do
string "\\begin{"
name <- many alphaNum
star <- option "" (string "*") -- some environments have starred variants
char '}'
- option [] commandArgs
+ optional commandArgs
spaces
contents <- manyTill block (end (name ++ star))
- return (BlockQuote contents))
+ return $ BlockQuote contents
--
-- parsing documents
--
-- | Process LaTeX preamble, extracting metadata.
-processLaTeXPreamble = try (do
- manyTill (choice [bibliographic, comment, unknownCommand, nullBlock])
- (try (string "\\begin{document}"))
- spaces)
+processLaTeXPreamble = try $ manyTill
+ (choice [bibliographic, comment, unknownCommand, nullBlock])
+ (try (string "\\begin{document}")) >>
+ spaces
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX = do
- option () processLaTeXPreamble -- preamble might not be present (fragment)
+ optional processLaTeXPreamble -- preamble might not be present (fragment)
+ spaces
blocks <- parseBlocks
spaces
- option "" (try (string "\\end{document}")) -- might not be present (in fragment)
+ optional $ try (string "\\end{document}") -- might not be present (fragment)
spaces
eof
state <- getState
@@ -136,21 +125,27 @@ parseLaTeX = do
let title' = stateTitle state
let authors' = stateAuthors state
let date' = stateDate state
- return (Pandoc (Meta title' authors' date') blocks')
+ return $ Pandoc (Meta title' authors' date') blocks'
--
-- parsing blocks
--
-parseBlocks = do
- spaces
- result <- many block
- return result
-
-block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock,
- comment, bibliographic, para, specialEnvironment,
- itemBlock, unknownEnvironment, unknownCommand ] <?>
- "block"
+parseBlocks = spaces >> many block
+
+block = choice [ hrule
+ , codeBlock
+ , header
+ , list
+ , blockQuote
+ , mathBlock
+ , comment
+ , bibliographic
+ , para
+ , specialEnvironment
+ , itemBlock
+ , unknownEnvironment
+ , unknownCommand ] <?> "block"
--
-- header blocks
@@ -158,24 +153,21 @@ block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock,
header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
-headerLevel n = try (do
+headerLevel n = try $ do
let subs = concat $ replicate (n - 1) "sub"
string ("\\" ++ subs ++ "section")
- option ' ' (char '*')
+ optional (char '*')
char '{'
title <- manyTill inline (char '}')
spaces
- return (Header n (normalizeSpaces title)))
+ return $ Header n (normalizeSpaces title)
--
-- hrule block
--
-hrule = try (do
- oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
- "\\newpage" ]
- spaces
- return HorizontalRule)
+hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
+ "\\newpage" ] >> spaces >> return HorizontalRule
--
-- code blocks
@@ -183,37 +175,28 @@ hrule = try (do
codeBlock = codeBlock1 <|> codeBlock2
-codeBlock1 = try (do
+codeBlock1 = try $ do
string "\\begin{verbatim}" -- don't use begin function because it
-- gobbles whitespace
- option "" blanklines -- we want to gobble blank lines, but not
+ optional blanklines -- we want to gobble blank lines, but not
-- leading space
contents <- manyTill anyChar (try (string "\\end{verbatim}"))
spaces
- return (CodeBlock (stripTrailingNewlines contents)))
+ return $ CodeBlock (stripTrailingNewlines contents)
-codeBlock2 = try (do
- string "\\begin{Verbatim}" -- used by fancyverb package
+codeBlock2 = try $ do
+ string "\\begin{Verbatim}" -- used by fancyvrb package
option "" blanklines
contents <- manyTill anyChar (try (string "\\end{Verbatim}"))
spaces
- return (CodeBlock (stripTrailingNewlines contents)))
+ return $ CodeBlock (stripTrailingNewlines contents)
--
-- block quotes
--
-blockQuote = choice [ blockQuote1, blockQuote2 ] <?> "blockquote"
-
-blockQuote1 = try (do
- blocks <- environment "quote"
- spaces
- return (BlockQuote blocks))
-
-blockQuote2 = try (do
- blocks <- environment "quotation"
- spaces
- return (BlockQuote blocks))
+blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
+ return . BlockQuote
--
-- math block
@@ -223,12 +206,12 @@ mathBlock = mathBlockWith (begin "equation") (end "equation") <|>
mathBlockWith (begin "displaymath") (end "displaymath") <|>
mathBlockWith (string "\\[") (string "\\]") <?> "math block"
-mathBlockWith start end = try (do
+mathBlockWith start end = try $ do
start
spaces
result <- manyTill anyChar end
spaces
- return (BlockQuote [Para [TeX ("$" ++ result ++ "$")]]))
+ return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]]
--
-- list blocks
@@ -237,69 +220,66 @@ mathBlockWith start end = try (do
list = bulletList <|> orderedList <|> definitionList <?> "list"
listItem = try $ do
- ("item", _, args) <- command
- spaces
- state <- getState
- let oldParserContext = stateParserContext state
- updateState (\state -> state {stateParserContext = ListItemState})
- blocks <- many block
- updateState (\state -> state {stateParserContext = oldParserContext})
- opt <- case args of
- ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
- parseFromString (many inline) $ tail $ init x
- _ -> return []
- return (opt, blocks)
+ ("item", _, args) <- command
+ spaces
+ state <- getState
+ let oldParserContext = stateParserContext state
+ updateState (\state -> state {stateParserContext = ListItemState})
+ blocks <- many block
+ updateState (\state -> state {stateParserContext = oldParserContext})
+ opt <- case args of
+ ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
+ parseFromString (many inline) $ tail $ init x
+ _ -> return []
+ return (opt, blocks)
orderedList = try $ do
- string "\\begin{enumerate}"
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ do failIfStrict
- char '['
- res <- anyOrderedListMarker
- char ']'
- return res
- spaces
- option "" $ try $ do string "\\setlength{\\itemindent}"
- char '{'
- manyTill anyChar (char '}')
- spaces
- start <- option 1 $ try $ do failIfStrict
- string "\\setcounter{enum"
- many1 (char 'i')
- string "}{"
- num <- many1 digit
- char '}'
- spaces
- return $ (read num) + 1
- items <- many listItem
- end "enumerate"
- spaces
- return $ OrderedList (start, style, delim) $ map snd items
+ string "\\begin{enumerate}"
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+ try $ do failIfStrict
+ char '['
+ res <- anyOrderedListMarker
+ char ']'
+ return res
+ spaces
+ option "" $ try $ do string "\\setlength{\\itemindent}"
+ char '{'
+ manyTill anyChar (char '}')
+ spaces
+ start <- option 1 $ try $ do failIfStrict
+ string "\\setcounter{enum"
+ many1 (char 'i')
+ string "}{"
+ num <- many1 digit
+ char '}'
+ spaces
+ return $ (read num) + 1
+ items <- many listItem
+ end "enumerate"
+ spaces
+ return $ OrderedList (start, style, delim) $ map snd items
bulletList = try $ do
- begin "itemize"
- spaces
- items <- many listItem
- end "itemize"
- spaces
- return (BulletList $ map snd items)
+ begin "itemize"
+ spaces
+ items <- many listItem
+ end "itemize"
+ spaces
+ return (BulletList $ map snd items)
definitionList = try $ do
- begin "description"
- spaces
- items <- many listItem
- end "description"
- spaces
- return (DefinitionList items)
+ begin "description"
+ spaces
+ items <- many listItem
+ end "description"
+ spaces
+ return (DefinitionList items)
--
-- paragraph block
--
-para = try (do
- result <- many1 inline
- spaces
- return (Para (normalizeSpaces result)))
+para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
--
-- title authors date
@@ -307,33 +287,30 @@ para = try (do
bibliographic = choice [ maketitle, title, authors, date ]
-maketitle = try (do
- string "\\maketitle"
- spaces
- return Null)
+maketitle = try (string "\\maketitle") >> spaces >> return Null
-title = try (do
+title = try $ do
string "\\title{"
tit <- manyTill inline (char '}')
spaces
updateState (\state -> state { stateTitle = tit })
- return Null)
+ return Null
-authors = try (do
+authors = try $ do
string "\\author{"
authors <- manyTill anyChar (char '}')
spaces
let authors' = map removeLeadingTrailingSpace $ lines $
substitute "\\\\" "\n" authors
updateState (\state -> state { stateAuthors = authors' })
- return Null)
+ return Null
-date = try (do
+date = try $ do
string "\\date{"
date' <- manyTill anyChar (char '}')
spaces
updateState (\state -> state { stateDate = date' })
- return Null)
+ return Null
--
-- item block
@@ -341,14 +318,14 @@ date = try (do
--
-- this forces items to be parsed in different blocks
-itemBlock = try (do
+itemBlock = try $ do
("item", _, args) <- command
state <- getState
if (stateParserContext state == ListItemState)
then fail "item should be handled by list block"
else if null args
then return Null
- else return (Plain [Str (stripFirstAndLast (head args))]))
+ else return $ Plain [Str (stripFirstAndLast (head args))]
--
-- raw LaTeX
@@ -362,77 +339,93 @@ specialEnvironment = do -- these are always parsed as raw
-- | Parse any LaTeX environment and return a Para block containing
-- the whole literal environment as raw TeX.
rawLaTeXEnvironment :: GenParser Char st Block
-rawLaTeXEnvironment = try (do
- string "\\begin"
- char '{'
+rawLaTeXEnvironment = try $ do
+ string "\\begin{"
name <- many1 alphaNum
star <- option "" (string "*") -- for starred variants
let name' = name ++ star
char '}'
args <- option [] commandArgs
let argStr = concat args
- contents <- manyTill (choice [(many1 (noneOf "\\")),
+ contents <- manyTill (choice [ (many1 (noneOf "\\")),
(do
(Para [TeX str]) <- rawLaTeXEnvironment
return str),
string "\\" ])
(end name')
spaces
- return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++
- (concat contents) ++ "\\end{" ++ name' ++ "}")]))
+ return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++
+ concat contents ++ "\\end{" ++ name' ++ "}"]
-unknownEnvironment = try (do
+unknownEnvironment = try $ do
state <- getState
result <- if stateParseRaw state -- check whether we should include raw TeX
then rawLaTeXEnvironment -- if so, get whole raw environment
else anyEnvironment -- otherwise just the contents
- return result)
+ return result
-unknownCommand = try (do
- notFollowedBy' $ choice $ map end
- ["itemize", "enumerate", "description", "document"]
+unknownCommand = try $ do
+ notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
+ "document"]
(name, star, args) <- command
spaces
let argStr = concat args
state <- getState
- if (name == "item") && ((stateParserContext state) == ListItemState)
+ if name == "item" && (stateParserContext state) == ListItemState
then fail "should not be parsed as raw"
else string ""
if stateParseRaw state
- then return (Plain [TeX ("\\" ++ name ++ star ++ argStr)])
- else return (Plain [Str (joinWithSep " " args)]))
+ then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
+ else return $ Plain [Str (joinWithSep " " args)]
-- latex comment
-comment = try (do
- char '%'
- result <- manyTill anyChar newline
- spaces
- return Null)
+comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
--
-- inline
--
-inline = choice [ strong, emph, strikeout, superscript, subscript,
- ref, lab, code, linebreak, spacer,
- math, ellipses, emDash, enDash, hyphen, quoted, apostrophe,
- accentedChar, specialChar, specialInline, escapedChar,
- unescapedChar, str, endline, whitespace ] <?> "inline"
-
-specialInline = choice [ url, link, image, footnote, rawLaTeXInline ]
- <?> "link, raw TeX, note, or image"
+inline = choice [ strong
+ , emph
+ , strikeout
+ , superscript
+ , subscript
+ , ref
+ , lab
+ , code
+ , linebreak
+ , spacer
+ , math
+ , ellipses
+ , emDash
+ , enDash
+ , hyphen
+ , quoted
+ , apostrophe
+ , accentedChar
+ , specialChar
+ , url
+ , link
+ , image
+ , footnote
+ , rawLaTeXInline
+ , escapedChar
+ , unescapedChar
+ , str
+ , endline
+ , whitespace ] <?> "inline"
accentedChar = normalAccentedChar <|> specialAccentedChar
-normalAccentedChar = try (do
+normalAccentedChar = try $ do
char '\\'
accent <- oneOf "'`^\"~"
- character <- choice [ between (char '{') (char '}') anyChar, anyChar ]
+ character <- (try $ char '{' >> alphaNum >>~ char '}') <|> alphaNum
let table = fromMaybe [] $ lookup character accentTable
let result = case lookup accent table of
Just num -> chr num
Nothing -> '?'
- return (Str [result]))
+ return $ Str [result]
-- an association list of letters and association list of accents
-- and decimal character numbers.
@@ -451,245 +444,179 @@ accentTable =
('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
- oslash, pound, euro, copyright, sect ]
+ oslash, pound, euro, copyright, sect ]
-ccedil = try (do
+ccedil = try $ do
char '\\'
letter <- oneOfStrings ["cc", "cC"]
let num = if letter == "cc" then 231 else 199
- return (Str [chr num]))
+ return $ Str [chr num]
-aring = try (do
+aring = try $ do
char '\\'
letter <- oneOfStrings ["aa", "AA"]
let num = if letter == "aa" then 229 else 197
- return (Str [chr num]))
+ return $ Str [chr num]
-iuml = try (do
- string "\\\""
- oneOfStrings ["\\i", "{\\i}"]
- return (Str [chr 239]))
+iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
+ return (Str [chr 239])
-icirc = try (do
- string "\\^"
- oneOfStrings ["\\i", "{\\i}"]
- return (Str [chr 238]))
+icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >>
+ return (Str [chr 238])
-szlig = try (do
- string "\\ss"
- return (Str [chr 223]))
+szlig = try (string "\\ss") >> return (Str [chr 223])
-oslash = try (do
+oslash = try $ do
char '\\'
letter <- choice [char 'o', char 'O']
let num = if letter == 'o' then 248 else 216
- return (Str [chr num]))
+ return $ Str [chr num]
-aelig = try (do
+aelig = try $ do
char '\\'
letter <- oneOfStrings ["ae", "AE"]
let num = if letter == "ae" then 230 else 198
- return (Str [chr num]))
+ return $ Str [chr num]
-pound = try (do
- string "\\pounds"
- return (Str [chr 163]))
+pound = try (string "\\pounds") >> return (Str [chr 163])
-euro = try (do
- string "\\euro"
- return (Str [chr 8364]))
+euro = try (string "\\euro") >> return (Str [chr 8364])
-copyright = try (do
- string "\\copyright"
- return (Str [chr 169]))
+copyright = try (string "\\copyright") >> return (Str [chr 169])
-sect = try (do
- string "\\S"
- return (Str [chr 167]))
+sect = try (string "\\S") >> return (Str [chr 167])
escapedChar = do
result <- escaped (oneOf " $%&_#{}\n")
- return (if result == Str "\n" then Str " " else result)
+ return $ if result == Str "\n" then Str " " else result
-unescapedChar = do -- ignore standalone, nonescaped special characters
- oneOf "$^&_#{}|<>"
- return (Str "")
+-- ignore standalone, nonescaped special characters
+unescapedChar = oneOf "$^&_#{}|<>" >> return (Str "")
specialChar = choice [ backslash, tilde, caret, bar, lt, gt ]
-backslash = try (do
- string "\\textbackslash"
- return (Str "\\"))
+backslash = try (string "\\textbackslash") >> return (Str "\\")
-tilde = try (do
- string "\\ensuremath{\\sim}"
- return (Str "~"))
+tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
-caret = try (do
- string "\\^{}"
- return (Str "^"))
+caret = try (string "\\^{}") >> return (Str "^")
-bar = try (do
- string "\\textbar"
- return (Str "\\"))
+bar = try (string "\\textbar") >> return (Str "\\")
-lt = try (do
- string "\\textless"
- return (Str "<"))
+lt = try (string "\\textless") >> return (Str "<")
-gt = try (do
- string "\\textgreater"
- return (Str ">"))
+gt = try (string "\\textgreater") >> return (Str ">")
-code = try (do
+code = try $ do
string "\\verb"
marker <- anyChar
result <- manyTill anyChar (char marker)
- let result' = removeLeadingTrailingSpace result
- return (Code result'))
+ return $ Code $ removeLeadingTrailingSpace result
-emph = try (do
- oneOfStrings [ "\\emph{", "\\textit{" ]
- result <- manyTill inline (char '}')
- return (Emph result))
+emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
+ manyTill inline (char '}') >>= return . Emph
-strikeout = try $ do
- string "\\sout{"
- result <- manyTill inline (char '}')
- return (Strikeout result)
+strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
+ return . Strikeout
-superscript = try $ do
- string "\\textsuperscript{"
- result <- manyTill inline (char '}')
- return (Superscript result)
+superscript = try $ string "\\textsuperscript{" >>
+ manyTill inline (char '}') >>= return . Superscript
-- note: \textsubscript isn't a standard latex command, but we use
-- a defined version in pandoc.
-subscript = try $ do
- string "\\textsubscript{"
- result <- manyTill inline (char '}')
- return (Subscript result)
+subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
+ return . Subscript
-apostrophe = do
- char '\''
- return Apostrophe
+apostrophe = char '\'' >> return Apostrophe
-quoted = do
- doubleQuoted <|> singleQuoted
+quoted = doubleQuoted <|> singleQuoted
-singleQuoted = try (do
- result <- enclosed singleQuoteStart singleQuoteEnd inline
- return $ Quoted SingleQuote $ normalizeSpaces result)
+singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
+ return . Quoted SingleQuote . normalizeSpaces
-doubleQuoted = try (do
- result <- enclosed doubleQuoteStart doubleQuoteEnd inline
- return $ Quoted DoubleQuote $ normalizeSpaces result)
+doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
+ return . Quoted DoubleQuote . normalizeSpaces
singleQuoteStart = char '`'
-singleQuoteEnd = char '\'' >> notFollowedBy alphaNum
+singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
doubleQuoteStart = string "``"
doubleQuoteEnd = string "''"
-ellipses = try (do
- string "\\ldots"
- option "" (try (string "{}"))
- return Ellipses)
+ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >>
+ return Ellipses
-enDash = try (do
- string "--"
- notFollowedBy (char '-')
- return EnDash)
+enDash = try (string "--") >> return EnDash
-emDash = try (do
- string "---"
- return EmDash)
+emDash = try (string "---") >> return EmDash
-hyphen = do
- char '-'
- return (Str "-")
+hyphen = char '-' >> return (Str "-")
-lab = try (do
+lab = try $ do
string "\\label{"
result <- manyTill anyChar (char '}')
- return (Str ("(" ++ result ++ ")")))
+ return $ Str $ "(" ++ result ++ ")"
-ref = try (do
- string "\\ref{"
- result <- manyTill anyChar (char '}')
- return (Str (result)))
+ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
-strong = try (do
- string "\\textbf{"
- result <- manyTill inline (char '}')
- return (Strong result))
+strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
+ return . Strong
-whitespace = do
- many1 (oneOf "~ \t")
- return Space
+whitespace = many1 (oneOf "~ \t") >> return Space
-- hard line break
-linebreak = try (do
- string "\\\\"
- return LineBreak)
+linebreak = try (string "\\\\") >> return LineBreak
-spacer = try $ do
- string "\\,"
- return (Str "")
+spacer = try (string "\\,") >> return (Str "")
-str = do
- result <- many1 (noneOf specialChars)
- return (Str result)
+str = many1 (noneOf specialChars) >>= return . Str
-- endline internal to paragraph
-endline = try (do
- newline
- notFollowedBy blankline
- return Space)
+endline = try $ newline >> notFollowedBy blankline >> return Space
-- math
math = math1 <|> math2 <?> "math"
-math1 = try (do
+math1 = try $ do
char '$'
result <- many (noneOf "$")
char '$'
- return (TeX ("$" ++ result ++ "$")))
+ return $ TeX ("$" ++ result ++ "$")
-math2 = try (do
+math2 = try $ do
string "\\("
result <- many (noneOf "$")
string "\\)"
- return (TeX ("$" ++ result ++ "$")))
+ return $ TeX ("$" ++ result ++ "$")
--
-- links and images
--
-url = try (do
+url = try $ do
string "\\url"
url <- charsInBalanced '{' '}'
- return (Link [Code url] (url, "")))
+ return $ Link [Code url] (url, "")
-link = try (do
+link = try $ do
string "\\href{"
url <- manyTill anyChar (char '}')
char '{'
label <- manyTill inline (char '}')
- return (Link (normalizeSpaces label) (url, "")))
+ return $ Link (normalizeSpaces label) (url, "")
-image = try (do
+image = try $ do
("includegraphics", _, args) <- command
let args' = filter isArg args -- filter out options
let src = if null args' then
("", "")
else
(stripFirstAndLast (head args'), "")
- return (Image [Str "image"] src))
+ return $ Image [Str "image"] src
-footnote = try (do
+footnote = try $ do
(name, _, (contents:[])) <- command
if ((name == "footnote") || (name == "thanks"))
then string ""
@@ -700,16 +627,15 @@ footnote = try (do
setInput $ contents'
blocks <- parseBlocks
setInput rest
- return (Note blocks))
+ return $ Note blocks
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
-rawLaTeXInline = try (do
+rawLaTeXInline = try $ do
(name, star, args) <- command
- let argStr = concat args
state <- getState
if ((name == "begin") || (name == "end") || (name == "item"))
then fail "not an inline command"
else string ""
- return (TeX ("\\" ++ name ++ star ++ argStr)))
+ return $ TeX ("\\" ++ name ++ star ++ concat args)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 3ccb74ba7..80a8507b4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -31,28 +31,24 @@ module Text.Pandoc.Readers.Markdown (
readMarkdown
) where
-import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect, lookup )
+import Data.List ( transpose, isSuffixOf, lookup, sortBy )
+import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
-import Text.Pandoc.ParserCombinators
import Text.Pandoc.Definition
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
import Text.Pandoc.Shared
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
import Text.Pandoc.Readers.HTML ( rawHtmlBlock,
anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag,
htmlEndTag, extractTagType,
htmlBlockElement )
-import Text.Pandoc.Entities ( characterEntity, decodeEntities )
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.ParserCombinators.Parsec
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ParserState -> String -> Pandoc
readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
--- | Parse markdown string with default options and print result (for testing).
-testString :: String -> IO ()
-testString = testStringWith parseMarkdown
-
--
-- Constants and data structure definitions
--
@@ -70,19 +66,16 @@ specialChars = "\\[]*_~`<>$!^-.&'\""
-- auxiliary functions
--
--- | Skip a single endline if there is one.
-skipEndline = option Space endline
-
indentSpaces = try $ do
state <- getState
let tabStop = stateTabStop state
try (count tabStop (char ' ')) <|>
- (do{many (char ' '); string "\t"}) <?> "indentation"
+ (many (char ' ') >> string "\t") <?> "indentation"
nonindentSpaces = do
state <- getState
let tabStop = stateTabStop state
- choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)]))
+ choice $ map (\n -> (try (count n (char ' ')))) $ reverse [0..(tabStop - 1)]
-- | Fail unless we're at beginning of a line.
failUnlessBeginningOfLine = do
@@ -94,20 +87,21 @@ failUnlessSmart = do
state <- getState
if stateSmart state then return () else fail "Smart typography feature"
+-- | Parse an inline Str element with a given content.
+inlineString str = try $ do
+ (Str res) <- inline
+ if res == str then return res else fail $ "unexpected Str content"
+
-- | Parse a sequence of inline elements between a string
-- @opener@ and a string @closer@, including inlines
-- between balanced pairs of @opener@ and a @closer@.
inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline]
inlinesInBalanced opener closer = try $ do
- let openerSymbol = try $ do
- res <- inline
- if res == Str opener
- then return res
- else pzero
- try (string opener)
- result <- manyTill ( (do lookAhead openerSymbol
- bal <- inlinesInBalanced opener closer
- return $ [Str opener] ++ bal ++ [Str closer])
+ string opener
+ result <- manyTill ( (do lookAhead (inlineString opener)
+ -- because it might be a link...
+ bal <- inlinesInBalanced opener closer
+ return $ [Str opener] ++ bal ++ [Str closer])
<|> (count 1 inline))
(try (string closer))
return $ concat result
@@ -116,59 +110,55 @@ inlinesInBalanced opener closer = try $ do
-- document structure
--
-titleLine = try (do
- char '%'
- skipSpaces
- line <- manyTill inline newline
- return line)
+titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
-authorsLine = try (do
+authorsLine = try $ do
char '%'
skipSpaces
authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
newline
- return (map (decodeEntities . removeLeadingTrailingSpace) authors))
+ return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
-dateLine = try (do
+dateLine = try $ do
char '%'
skipSpaces
date <- many (noneOf "\n")
newline
- return (decodeEntities $ removeTrailingSpace date))
+ return $ decodeCharacterReferences $ removeTrailingSpace date
-titleBlock = try (do
+titleBlock = try $ do
failIfStrict
title <- option [] titleLine
author <- option [] authorsLine
date <- option "" dateLine
- option "" blanklines
- return (title, author, date))
+ optional blanklines
+ return (title, author, date)
parseMarkdown = do
- updateState (\state -> state { stateParseRaw = True }) -- markdown allows raw HTML
+ -- markdown allows raw HTML
+ updateState (\state -> state { stateParseRaw = True })
(title, author, date) <- option ([],[],"") titleBlock
-- go through once just to get list of reference keys
- refs <- manyTill (referenceKey <|> (do l <- lineClump
- return (LineClump l))) eof
+ refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof
let keys = map (\(KeyBlock label target) -> (label, target)) $
filter isKeyBlock refs
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
setInput $ concat rawlines -- with keys stripped out
updateState (\state -> state { stateKeys = keys })
- -- now go through for notes
- refs <- manyTill (noteBlock <|> (do l <- lineClump
- return (LineClump l))) eof
+ -- now go through for notes (which may contain references - hence 2nd pass)
+ refs <- manyTill (noteBlock <|> (lineClump >>= return . LineClump)) eof
let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $
filter isNoteBlock refs
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
- setInput $ concat rawlines -- with note blocks and keys stripped out
+ -- go through a 3rd time, with note blocks and keys stripped out
+ setInput $ concat rawlines
updateState (\state -> state { stateNotes = notes })
- blocks <- parseBlocks -- go through again, for real
+ blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
- return (Pandoc (Meta title author date) blocks')
+ return $ Pandoc (Meta title author date) blocks'
--
--- initial pass for references
+-- initial pass for references and notes
--
referenceKey = try $ do
@@ -176,9 +166,9 @@ referenceKey = try $ do
label <- reference
char ':'
skipSpaces
- option ' ' (char '<')
+ optional (char '<')
src <- many (noneOf "> \n\t")
- option ' ' (char '>')
+ optional (char '>')
tit <- option "" title
blanklines
return $ KeyBlock label (removeTrailingSpace src, tit)
@@ -189,33 +179,28 @@ noteMarker = try $ do
manyTill (noneOf " \t\n") (char ']')
rawLine = try $ do
- notFollowedBy' blankline
+ notFollowedBy blankline
notFollowedBy' noteMarker
contents <- many1 nonEndline
- end <- option "" (do
- newline
- option "" indentSpaces
- return "\n")
- return (contents ++ end)
+ end <- option "" (newline >> optional indentSpaces >> return "\n")
+ return $ contents ++ end
-rawLines = do
- lines <- many1 rawLine
- return (concat lines)
+rawLines = many1 rawLine >>= return . concat
noteBlock = try $ do
failIfStrict
ref <- noteMarker
char ':'
- option ' ' blankline
- option "" indentSpaces
- raw <- sepBy rawLines (try (do {blankline; indentSpaces}))
- option "" blanklines
+ optional blankline
+ optional indentSpaces
+ raw <- sepBy rawLines (try (blankline >> indentSpaces))
+ optional blanklines
-- parse the extracted text, which may contain various block elements:
rest <- getInput
setInput $ (joinWithSep "\n" raw) ++ "\n\n"
contents <- parseBlocks
setInput rest
- return (NoteBlock ref contents)
+ return $ NoteBlock ref contents
--
-- parsing blocks
@@ -239,48 +224,39 @@ block = choice [ header
-- header blocks
--
-header = choice [ setextHeader, atxHeader ] <?> "header"
+header = setextHeader <|> atxHeader <?> "header"
-atxHeader = try (do
+atxHeader = try $ do
lead <- many1 (char '#')
- notFollowedBy (char '.') -- this would be a list
- notFollowedBy (char ')')
+ notFollowedBy (char '.' <|> char ')') -- this would be a list
skipSpaces
txt <- manyTill inline atxClosing
- return (Header (length lead) (normalizeSpaces txt)))
+ return $ Header (length lead) (normalizeSpaces txt)
-atxClosing = try (do
- skipMany (char '#')
- skipSpaces
- newline
- option "" blanklines)
+atxClosing = try $ skipMany (char '#') >> skipSpaces >> newline >>
+ option "" blanklines
setextHeader = choice $
- map (\x -> setextH x) (enumFromTo 1 (length setextHChars))
+ map (\x -> setextH x) $ enumFromTo 1 (length setextHChars)
-setextH n = try (do
+setextH n = try $ do
txt <- many1Till inline newline
many1 (char (setextHChars !! (n-1)))
skipSpaces
newline
- option "" blanklines
- return (Header n (normalizeSpaces txt)))
+ optional blanklines
+ return $ Header n (normalizeSpaces txt)
--
-- hrule block
--
-hruleWith chr = try (do
- skipSpaces
- char chr
- skipSpaces
- char chr
- skipSpaces
- char chr
- skipMany (oneOf (chr:spaceChars))
+hruleWith chr = try $ do
+ count 3 (skipSpaces >> char chr)
+ skipMany (skipSpaces >> char chr)
newline
- option "" blanklines
- return HorizontalRule)
+ optional blanklines
+ return HorizontalRule
hrule = choice (map hruleWith hruleChars) <?> "hrule"
@@ -288,67 +264,55 @@ hrule = choice (map hruleWith hruleChars) <?> "hrule"
-- code blocks
--
-indentedLine = try (do
+indentedLine = try $ do
indentSpaces
result <- manyTill anyChar newline
- return (result ++ "\n"))
+ return $ result ++ "\n"
-- two or more indented lines, possibly separated by blank lines
-indentedBlock = try (do
+indentedBlock = try $ do
res1 <- indentedLine
blanks <- many blankline
- res2 <- choice [indentedBlock, indentedLine]
- return (res1 ++ blanks ++ res2))
+ res2 <- indentedBlock <|> indentedLine
+ return $ res1 ++ blanks ++ res2
-codeBlock = do
- result <- choice [indentedBlock, indentedLine]
- option "" blanklines
- return (CodeBlock (stripTrailingNewlines result))
+codeBlock = (indentedBlock <|> indentedLine) >>~ optional blanklines >>=
+ return . CodeBlock . stripTrailingNewlines
--
-- block quotes
--
-emacsBoxQuote = try (do
+emacsBoxQuote = try $ do
failIfStrict
string ",----"
manyTill anyChar newline
- raw <- manyTill (try (do
- char '|'
- option ' ' (char ' ')
- result <- manyTill anyChar newline
- return result))
- (string "`----")
- manyTill anyChar newline
- option "" blanklines
- return raw)
+ raw <- manyTill
+ (try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
+ (try (string "`----"))
+ blanklines
+ return raw
-emailBlockQuoteStart = try (do
- nonindentSpaces
- char '>'
- option ' ' (char ' ')
- return "> ")
+emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
-emailBlockQuote = try (do
+emailBlockQuote = try $ do
emailBlockQuoteStart
- raw <- sepBy (many (choice [nonEndline,
- (try (do
- endline
- notFollowedBy' emailBlockQuoteStart
- return '\n'))]))
- (try (do {newline; emailBlockQuoteStart}))
- newline <|> (do{ eof; return '\n' })
- option "" blanklines
- return raw)
+ raw <- sepBy (many (nonEndline <|>
+ (try (endline >> notFollowedBy emailBlockQuoteStart >>
+ return '\n'))))
+ (try (newline >> emailBlockQuoteStart))
+ newline <|> (eof >> return '\n')
+ optional blanklines
+ return raw
blockQuote = do
- raw <- choice [ emailBlockQuote, emacsBoxQuote ]
+ raw <- emailBlockQuote <|> emacsBoxQuote
-- parse the extracted block, which may contain various block elements:
rest <- getInput
setInput $ (joinWithSep "\n" raw) ++ "\n\n"
contents <- parseBlocks
setInput rest
- return (BlockQuote contents)
+ return $ BlockQuote contents
--
-- list blocks
@@ -357,7 +321,7 @@ blockQuote = do
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
bulletListStart = try $ do
- option ' ' newline -- if preceded by a Plain block in a list context
+ optional newline -- if preceded by a Plain block in a list context
nonindentSpaces
notFollowedBy' hrule -- because hrules start out just like lists
oneOf bulletListMarkers
@@ -365,7 +329,7 @@ bulletListStart = try $ do
skipSpaces
anyOrderedListStart = try $ do
- option ' ' newline -- if preceded by a Plain block in a list context
+ optional newline -- if preceded by a Plain block in a list context
nonindentSpaces
state <- getState
if stateStrict state
@@ -375,7 +339,7 @@ anyOrderedListStart = try $ do
else anyOrderedListMarker
orderedListStart style delim = try $ do
- option ' ' newline -- if preceded by a Plain block in a list context
+ optional newline -- if preceded by a Plain block in a list context
nonindentSpaces
state <- getState
if stateStrict state
@@ -387,40 +351,39 @@ orderedListStart style delim = try $ do
skipSpaces
-- parse a line of a list item (start = parser for beginning of list item)
-listLine start = try (do
+listLine start = try $ do
notFollowedBy' start
notFollowedBy blankline
- notFollowedBy' (do
- indentSpaces
- many (spaceChar)
- choice [bulletListStart, anyOrderedListStart >> return ()])
+ notFollowedBy' (do indentSpaces
+ many (spaceChar)
+ bulletListStart <|> (anyOrderedListStart >> return ()))
line <- manyTill anyChar newline
- return (line ++ "\n"))
+ return $ line ++ "\n"
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem start = try (do
+rawListItem start = try $ do
start
result <- many1 (listLine start)
blanks <- many blankline
- return ((concat result) ++ blanks))
+ return $ concat result ++ blanks
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
-listContinuation start = try (do
+listContinuation start = try $ do
lookAhead indentSpaces
result <- many1 (listContinuationLine start)
blanks <- many blankline
- return ((concat result) ++ blanks))
+ return $ concat result ++ blanks
-listContinuationLine start = try (do
- notFollowedBy' blankline
+listContinuationLine start = try $ do
+ notFollowedBy blankline
notFollowedBy' start
- option "" indentSpaces
+ optional indentSpaces
result <- manyTill anyChar newline
- return (result ++ "\n"))
+ return $ result ++ "\n"
-listItem start = try (do
+listItem start = try $ do
first <- rawListItem start
continuations <- many (listContinuation start)
-- parsing with ListItemState forces markers at beginning of lines to
@@ -436,18 +399,15 @@ listItem start = try (do
contents <- parseBlocks
setInput rest
updateState (\st -> st {stateParserContext = oldContext})
- return contents)
+ return contents
-orderedList = try (do
+orderedList = do
(start, style, delim) <- lookAhead anyOrderedListStart
items <- many1 (listItem (orderedListStart style delim))
- let items' = compactify items
- return (OrderedList (start, style, delim) items'))
+ return $ OrderedList (start, style, delim) $ compactify items
-bulletList = try (do
- items <- many1 (listItem bulletListStart)
- let items' = compactify items
- return (BulletList items'))
+bulletList = many1 (listItem bulletListStart) >>=
+ return . BulletList . compactify
-- definition lists
@@ -470,9 +430,9 @@ defRawBlock = try $ do
char ':'
state <- getState
let tabStop = stateTabStop state
- try (count (tabStop - 1) (char ' ')) <|> (do{many (char ' '); string "\t"})
+ try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
firstline <- anyLine
- rawlines <- many (do {notFollowedBy' blankline; indentSpaces; anyLine})
+ rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
trailing <- option "" blanklines
return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
@@ -488,71 +448,62 @@ definitionList = do
-- paragraph block
--
-para = try (do
+para = try $ do
result <- many1 inline
newline
st <- getState
if stateStrict st
- then choice [lookAhead blockQuote, lookAhead header,
- (do{blanklines; return Null})]
- else choice [(do{lookAhead emacsBoxQuote; return Null}),
- (do{blanklines; return Null})]
- let result' = normalizeSpaces result
- return (Para result'))
-
-plain = do
- result <- many1 inline
- let result' = normalizeSpaces result
- return (Plain result')
+ then choice [ lookAhead blockQuote, lookAhead header,
+ (blanklines >> return Null) ]
+ else choice [ lookAhead emacsBoxQuote >> return Null,
+ (blanklines >> return Null) ]
+ return $ Para $ normalizeSpaces result
+
+plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- raw html
--
-htmlElement = choice [strictHtmlBlock,
- htmlBlockElement] <?> "html element"
+htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
htmlBlock = do
st <- getState
if stateStrict st
- then do
- failUnlessBeginningOfLine
- first <- htmlElement
- finalSpace <- many (oneOf spaceChars)
- finalNewlines <- many newline
- return (RawHtml (first ++ finalSpace ++ finalNewlines))
+ then try $ do failUnlessBeginningOfLine
+ first <- htmlElement
+ finalSpace <- many (oneOf spaceChars)
+ finalNewlines <- many newline
+ return $ RawHtml $ first ++ finalSpace ++ finalNewlines
else rawHtmlBlocks
-- True if tag is self-closing
isSelfClosing tag =
isSuffixOf "/>" $ filter (\c -> (not (c `elem` " \n\t"))) tag
-strictHtmlBlock = try (do
+strictHtmlBlock = try $ do
tag <- anyHtmlBlockTag
let tag' = extractTagType tag
if isSelfClosing tag || tag' == "hr"
then return tag
- else do
- contents <- many (do{notFollowedBy' (htmlEndTag tag');
- htmlElement <|> (count 1 anyChar)})
- end <- htmlEndTag tag'
- return $ tag ++ (concat contents) ++ end)
+ else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
+ (htmlElement <|> (count 1 anyChar)))
+ end <- htmlEndTag tag'
+ return $ tag ++ concat contents ++ end
-rawHtmlBlocks = try (do
+rawHtmlBlocks = try $ do
htmlBlocks <- many1 rawHtmlBlock
let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
- let combined' = if (last combined == '\n')
+ let combined' = if not (null combined) && last combined == '\n'
then init combined -- strip extra newline
else combined
- return (RawHtml combined'))
+ return $ RawHtml combined'
--
-- LaTeX
--
-rawLaTeXEnvironment' = do
- failIfStrict
- rawLaTeXEnvironment
+rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
--
-- Tables
@@ -560,54 +511,46 @@ rawLaTeXEnvironment' = do
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
-dashedLine ch = do
- dashes <- many1 (char ch)
- sp <- many spaceChar
- return $ (length dashes, length $ dashes ++ sp)
+dashedLine ch = try $ do
+ dashes <- many1 (char ch)
+ sp <- many spaceChar
+ return $ (length dashes, length $ dashes ++ sp)
-- Parse a table header with dashed lines of '-' preceded by
-- one line of text.
-simpleTableHeader = do
- rawContent <- anyLine
- initSp <- nonindentSpaces
- dashes <- many1 (dashedLine '-')
- newline
- let (lengths, lines) = unzip dashes
- let indices = scanl (+) (length initSp) lines
- let rawHeads = tail $ splitByIndices (init indices) rawContent
- let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
- return $ (rawHeads, aligns, indices)
+simpleTableHeader = try $ do
+ rawContent <- anyLine
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines) = unzip dashes
+ let indices = scanl (+) (length initSp) lines
+ let rawHeads = tail $ splitByIndices (init indices) rawContent
+ let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+ return (rawHeads, aligns, indices)
-- Parse a table footer - dashed lines followed by blank line.
-tableFooter = try $ do
- nonindentSpaces
- many1 (dashedLine '-')
- blanklines
+tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
-- Parse a table separator - dashed line.
-tableSep = try $ do
- nonindentSpaces
- many1 (dashedLine '-')
- string "\n"
+tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
-- Parse a raw line and split it into chunks by indices.
rawTableLine indices = do
- notFollowedBy' (blanklines <|> tableFooter)
- line <- many1Till anyChar newline
- return $ map removeLeadingTrailingSpace $ tail $
- splitByIndices (init indices) line
+ notFollowedBy' (blanklines <|> tableFooter)
+ line <- many1Till anyChar newline
+ return $ map removeLeadingTrailingSpace $ tail $
+ splitByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
-tableLine indices = try $ do
- rawline <- rawTableLine indices
- mapM (parseFromString (many plain)) rawline
+tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
-- Parse a multiline table row and return a list of blocks (columns).
multilineRow indices = try $ do
- colLines <- many1 (rawTableLine indices)
- option "" blanklines
- let cols = map unlines $ transpose colLines
- mapM (parseFromString (many plain)) cols
+ colLines <- many1 (rawTableLine indices)
+ optional blanklines
+ let cols = map unlines $ transpose colLines
+ mapM (parseFromString (many plain)) cols
-- Calculate relative widths of table columns, based on indices
widthsFromIndices :: Int -- Number of columns on terminal
@@ -615,22 +558,22 @@ widthsFromIndices :: Int -- Number of columns on terminal
-> [Float] -- Fractional relative sizes of columns
widthsFromIndices _ [] = []
widthsFromIndices numColumns indices =
- let lengths = zipWith (-) indices (0:indices)
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
+ let lengths = zipWith (-) indices (0:indices)
+ totLength = sum lengths
+ quotient = if totLength > numColumns
+ then fromIntegral totLength
+ else fromIntegral numColumns
+ fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ tail fracs
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
tableCaption = try $ do
- nonindentSpaces
- string "Table:"
- result <- many1 inline
- blanklines
- return $ normalizeSpaces result
+ nonindentSpaces
+ string "Table:"
+ result <- many1 inline
+ blanklines
+ return $ normalizeSpaces result
-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
tableWith headerParser lineParser footerParser = try $ do
@@ -653,30 +596,19 @@ simpleTable = tableWith simpleTableHeader tableLine blanklines
multilineTable = tableWith multilineTableHeader multilineRow tableFooter
multilineTableHeader = try $ do
- tableSep
- rawContent <- many1 (do{notFollowedBy' tableSep;
- many1Till anyChar newline})
- initSp <- nonindentSpaces
- dashes <- many1 (dashedLine '-')
- newline
- let (lengths, lines) = unzip dashes
- let indices = scanl (+) (length initSp) lines
- let rawHeadsList = transpose $ map
- (\ln -> tail $ splitByIndices (init indices) ln)
- rawContent
- let rawHeads = map (joinWithSep " ") rawHeadsList
- let aligns = zipWith alignType rawHeadsList lengths
- return $ ((map removeLeadingTrailingSpace rawHeads),
- aligns, indices)
-
--- Returns the longest of a list of strings.
-longest :: [String] -> String
-longest [] = ""
-longest [x] = x
-longest (x:xs) =
- if (length x) >= (maximum $ map length xs)
- then x
- else longest xs
+ tableSep
+ rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines) = unzip dashes
+ let indices = scanl (+) (length initSp) lines
+ let rawHeadsList = transpose $ map
+ (\ln -> tail $ splitByIndices (init indices) ln)
+ rawContent
+ let rawHeads = map (joinWithSep " ") rawHeadsList
+ let aligns = zipWith alignType rawHeadsList lengths
+ return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
-- (the rows of the column header) and a number (the length of the
@@ -684,19 +616,17 @@ longest (x:xs) =
alignType :: [String] -> Int -> Alignment
alignType [] len = AlignDefault
alignType strLst len =
- let str = longest $ map removeTrailingSpace strLst
- leftSpace = if null str then False else ((str !! 0) `elem` " \t")
- rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in
- case (leftSpace, rightSpace) of
+ let str = head $ sortBy (comparing length) $
+ map removeTrailingSpace strLst
+ leftSpace = if null str then False else (str !! 0) `elem` " \t"
+ rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
+ in case (leftSpace, rightSpace) of
(True, False) -> AlignRight
(False, True) -> AlignLeft
- (True, True) -> AlignCenter
+ (True, True) -> AlignCenter
(False, False) -> AlignDefault
-table = do
- failIfStrict
- result <- simpleTable <|> multilineTable <?> "table"
- return result
+table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
--
-- inline
@@ -704,7 +634,7 @@ table = do
inline = choice [ rawLaTeXInline'
, escapedChar
- , entity
+ , charRef
, note
, inlineNote
, link
@@ -734,80 +664,64 @@ escapedChar = try $ do
result <- if stateStrict state
then oneOf "\\`*_{}[]()>#+-.!~"
else satisfy (not . isAlphaNum)
- return (Str [result])
+ return $ Str [result]
-ltSign = try (do
+ltSign = try $ do
notFollowedBy (noneOf "<") -- continue only if it's a <
notFollowedBy' rawHtmlBlocks -- don't return < if it starts html
char '<'
- return (Str ['<']))
+ return $ Str ['<']
specialCharsMinusLt = filter (/= '<') specialChars
symbol = do
result <- oneOf specialCharsMinusLt
- return (Str [result])
+ return $ Str [result]
-- parses inline code, between n `s and n `s
-code = try (do
+code = try $ do
starts <- many1 (char '`')
let num = length starts
result <- many1Till anyChar (try (count num (char '`')))
-- get rid of any internal newlines
- let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
- return (Code result'))
+ return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
-mathWord = many1 (choice [ (noneOf " \t\n\\$"),
- (try (do
- c <- char '\\'
- notFollowedBy (char '$')
- return c))])
+mathWord = many1 ((noneOf " \t\n\\$") <|>
+ (try (char '\\') >>~ notFollowedBy (char '$')))
-math = try (do
+math = try $ do
failIfStrict
char '$'
notFollowedBy space
words <- sepBy1 mathWord (many1 space)
char '$'
- return (TeX ("$" ++ (joinWithSep " " words) ++ "$")))
+ return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
-emph = do
- result <- choice [ (enclosed (char '*') (char '*') inline),
- (enclosed (char '_') (char '_') inline) ]
- return $ Emph (normalizeSpaces result)
+emph = ((enclosed (char '*') (char '*') inline) <|>
+ (enclosed (char '_') (char '_') inline)) >>=
+ return . Emph . normalizeSpaces
-strong = do
- result <- (enclosed (string "**") (string "**") inline) <|>
- (enclosed (string "__") (string "__") inline)
- return $ Strong (normalizeSpaces result)
+strong = ((enclosed (string "**") (string "**") inline) <|>
+ (enclosed (string "__") (string "__") inline)) >>=
+ return . Strong . normalizeSpaces
-strikeout = do
- failIfStrict
- result <- enclosed (string "~~") (string "~~") inline
- return $ Strikeout (normalizeSpaces result)
+strikeout = failIfStrict >> enclosed (string "~~") (string "~~") inline >>=
+ return . Strikeout . normalizeSpaces
-superscript = do
- failIfStrict
- result <- enclosed (char '^') (char '^')
- (notFollowedBy' whitespace >> inline) -- may not contain Space
- return $ Superscript result
+superscript = failIfStrict >> enclosed (char '^') (char '^')
+ (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+ return . Superscript
-subscript = do
- failIfStrict
- result <- enclosed (char '~') (char '~')
- (notFollowedBy' whitespace >> inline) -- may not contain Space
- return $ Subscript result
+subscript = failIfStrict >> enclosed (char '~') (char '~')
+ (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+ return . Subscript
-smartPunctuation = do
- failUnlessSmart
- choice [ quoted, apostrophe, dash, ellipses ]
+smartPunctuation = failUnlessSmart >>
+ choice [ quoted, apostrophe, dash, ellipses ]
-apostrophe = do
- char '\'' <|> char '\8217'
- return Apostrophe
+apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
-quoted = do
- doubleQuoted <|> singleQuoted
+quoted = doubleQuoted <|> singleQuoted
withQuoteContext context parser = do
oldState <- getState
@@ -820,15 +734,13 @@ withQuoteContext context parser = do
singleQuoted = try $ do
singleQuoteStart
- withQuoteContext InSingleQuote $ do
- result <- many1Till inline singleQuoteEnd
- return $ Quoted SingleQuote $ normalizeSpaces result
+ withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
+ return . Quoted SingleQuote . normalizeSpaces
doubleQuoted = try $ do
doubleQuoteStart
- withQuoteContext InDoubleQuote $ do
- result <- many1Till inline doubleQuoteEnd
- return $ Quoted DoubleQuote $ normalizeSpaces result
+ withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
+ return . Quoted DoubleQuote . normalizeSpaces
failIfInQuoteContext context = do
st <- getState
@@ -836,88 +748,65 @@ failIfInQuoteContext context = do
then fail "already inside quotes"
else return ()
-singleQuoteStart = try $ do
+singleQuoteStart = do
failIfInQuoteContext InSingleQuote
- char '\8216' <|> do
- char '\''
- notFollowedBy (oneOf ")!],.;:-? \t\n")
- notFollowedBy (try (do -- possessive or contraction
- oneOfStrings ["s","t","m","ve","ll","re"]
- satisfy (not . isAlphaNum)))
- return '\''
-
-singleQuoteEnd = try $ do
- char '\'' <|> char '\8217'
- notFollowedBy alphaNum
-
-doubleQuoteStart = try $ do
- failIfInQuoteContext InDoubleQuote
- char '"' <|> char '\8220'
- notFollowedBy (oneOf " \t\n")
+ char '\8216' <|>
+ do char '\''
+ notFollowedBy (oneOf ")!],.;:-? \t\n")
+ notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+ satisfy (not . isAlphaNum))) -- possess/contraction
+ return '\''
+
+singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum
+
+doubleQuoteStart = failIfInQuoteContext InDoubleQuote >>
+ (char '"' <|> char '\8220') >>
+ notFollowedBy (oneOf " \t\n")
doubleQuoteEnd = char '"' <|> char '\8221'
-ellipses = try (do
- oneOfStrings ["...", " . . . ", ". . .", " . . ."]
- return Ellipses)
+ellipses = try $ oneOfStrings ["...", " . . . ", ". . .", " . . ."] >>
+ return Ellipses
dash = enDash <|> emDash
-enDash = try (do
- char '-'
- notFollowedBy (noneOf "0123456789")
- return EnDash)
+enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
-emDash = try (do
- skipSpaces
- oneOfStrings ["---", "--"]
- skipSpaces
- return EmDash)
+emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >>
+ skipSpaces >> return EmDash
-whitespace = do
- many1 (oneOf spaceChars) <?> "whitespace"
- return Space
+whitespace = (many1 (oneOf spaceChars) >> return Space) <?> "whitespace"
-tabchar = do
- tab
- return (Str "\t")
+tabchar = tab >> return (Str "\t")
-- hard line break
-linebreak = try (do
- oneOf spaceChars
- many1 (oneOf spaceChars)
- endline
- return LineBreak )
+linebreak = try $ oneOf spaceChars >> many1 (oneOf spaceChars) >>
+ endline >> return LineBreak
nonEndline = satisfy (/='\n')
-entity = do
- ent <- characterEntity
- return $ Str [ent]
-
strChar = noneOf (specialChars ++ spaceChars ++ "\n")
-str = do
- result <- many1 strChar
- return (Str result)
+str = many1 strChar >>= return . Str
-- an endline character that can be treated as a space, not a structural break
-endline = try (do
+endline = try $ do
newline
notFollowedBy blankline
st <- getState
if stateStrict st
then do
- notFollowedBy' emailBlockQuoteStart
+ notFollowedBy emailBlockQuoteStart
notFollowedBy (char '#') -- atx header
- notFollowedBy (try (do{manyTill anyChar newline;
- oneOf setextHChars})) -- setext header
+ notFollowedBy (manyTill anyChar newline >> oneOf setextHChars)
+ -- setext header
else return ()
-- parse potential list-starts differently if in a list:
- if (stateParserContext st) == ListItemState
- then notFollowedBy' $ choice [bulletListStart, anyOrderedListStart >> return ()]
+ if stateParserContext st == ListItemState
+ then notFollowedBy' (bulletListStart <|>
+ (anyOrderedListStart >> return ()))
else return ()
- return Space)
+ return Space
--
-- links
@@ -930,24 +819,23 @@ reference = notFollowedBy' (string "[^") >> -- footnote reference
-- source for a link, with optional title
source = try $ do
char '('
- option ' ' (char '<')
+ optional (char '<')
src <- many (noneOf ")> \t\n")
- option ' ' (char '>')
+ optional (char '>')
tit <- option "" title
skipSpaces
char ')'
return (removeTrailingSpace src, tit)
-titleWith startChar endChar = try (do
+titleWith startChar endChar = try $ do
leadingSpace <- many1 (oneOf " \t\n")
if length (filter (=='\n') leadingSpace) > 1
then fail "title must be separated by space and on same or next line"
else return ()
char startChar
- tit <- manyTill anyChar (try (do char endChar
- skipSpaces
- notFollowedBy (noneOf ")\n")))
- return $ decodeEntities tit)
+ tit <- manyTill anyChar (try (char endChar >> skipSpaces >>
+ notFollowedBy (noneOf ")\n")))
+ return $ decodeCharacterReferences tit
title = choice [ titleWith '(' ')',
titleWith '"' '"',
@@ -955,22 +843,20 @@ title = choice [ titleWith '(' ')',
link = choice [explicitLink, referenceLink] <?> "link"
-explicitLink = try (do
+explicitLink = try $ do
label <- reference
src <- source
- return (Link label src))
+ return $ Link label src
-- a link like [this][ref] or [this][] or [this]
referenceLink = try $ do
label <- reference
- ref <- option [] (try (do skipSpaces
- option ' ' newline
- skipSpaces
- reference))
+ ref <- option [] (try (skipSpaces >> optional newline >>
+ skipSpaces >> reference))
let ref' = if null ref then label else ref
state <- getState
case lookupKeySrc (stateKeys state) ref' of
- Nothing -> fail "no corresponding key"
+ Nothing -> fail "no corresponding key"
Just target -> return (Link label target)
autoLink = autoLinkEmail <|> autoLinkRegular
@@ -992,10 +878,10 @@ autoLinkRegular = try $ do
let src = prot ++ rest
return $ Link [Code src] (src, "")
-image = try (do
+image = try $ do
char '!'
(Link label src) <- link
- return (Image label src))
+ return $ Image label src
note = try $ do
failIfStrict
@@ -1003,23 +889,21 @@ note = try $ do
state <- getState
let notes = stateNotes state
case lookup ref notes of
- Nothing -> fail "note not found"
- Just contents -> return (Note contents)
+ Nothing -> fail "note not found"
+ Just contents -> return $ Note contents
inlineNote = try $ do
failIfStrict
char '^'
contents <- inlinesInBalanced "[" "]"
- return (Note [Para contents])
+ return $ Note [Para contents]
-rawLaTeXInline' = do
- failIfStrict
- rawLaTeXInline
+rawLaTeXInline' = failIfStrict >> rawLaTeXInline
rawHtmlInline' = do
st <- getState
- result <- if stateStrict st
- then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
- else choice [htmlBlockElement, anyHtmlInlineTag]
- return (HtmlInline result)
+ result <- choice $ if stateStrict st
+ then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
+ else [htmlBlockElement, anyHtmlInlineTag]
+ return $ HtmlInline result
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a36c33d92..ce8fedf02 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,23 +31,14 @@ module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
-import Text.Pandoc.ParserCombinators
import Text.Pandoc.Shared
-import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
-import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
-import Data.Maybe ( fromMaybe )
import Data.List ( findIndex, delete )
-import Data.Char ( toUpper )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -> String -> Pandoc
readRST state str = (readWith parseRST) state (str ++ "\n\n")
--- | Parse a string and print result (for testing).
-testString :: String -> IO ()
-testString = testStringWith parseRST
-
--
-- Constants and data structure definitions
---
@@ -62,15 +53,11 @@ specialChars = "\\`|*_<>$:[-"
-- parsing documents
--
-isAnonKey (ref, src) = (ref == [Str "_"])
-
-isHeader1 :: Block -> Bool
-isHeader1 (Header 1 _) = True
-isHeader1 _ = False
+isAnonKey (ref, src) = ref == [Str "_"]
-isHeader2 :: Block -> Bool
-isHeader2 (Header 2 _) = True
-isHeader2 _ = False
+isHeader :: Int -> Block -> Bool
+isHeader n (Header x _) = x == n
+isHeader _ _ = False
-- | Promote all headers in a list of blocks. (Part of
-- title transformation for RST.)
@@ -86,23 +73,23 @@ promoteHeaders num [] = []
titleTransform :: [Block] -- ^ list of blocks
-> ([Block], [Inline]) -- ^ modified list of blocks, title
titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
- if (any isHeader1 rest) || (any isHeader2 rest)
+ if (any (isHeader 1) rest) || (any (isHeader 2) rest)
then ((Header 1 head1):(Header 2 head2):rest, [])
else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
titleTransform ((Header 1 head1):rest) = -- title, no subtitle
- if (any isHeader1 rest)
+ if (any (isHeader 1) rest)
then ((Header 1 head1):rest, [])
else ((promoteHeaders 1 rest), head1)
titleTransform blocks = (blocks, [])
parseRST = do
- -- first pass: get anonymous keys
- refs <- manyTill (referenceKey <|> (do l <- lineClump
- return (LineClump l))) eof
+ -- first pass: get keys
+ refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof
let keys = map (\(KeyBlock label target) -> (label, target)) $
filter isKeyBlock refs
+ -- second pass, with keys stripped out
let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs
- setInput $ concat rawlines -- with keys stripped out
+ setInput $ concat rawlines
updateState (\state -> state { stateKeys = keys })
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
@@ -113,7 +100,7 @@ parseRST = do
let authors = stateAuthors state
let date = stateDate state
let title' = if (null title) then (stateTitle state) else title
- return (Pandoc (Meta title' authors date) blocks'')
+ return $ Pandoc (Meta title' authors date) blocks''
--
-- parsing blocks
@@ -121,32 +108,39 @@ parseRST = do
parseBlocks = manyTill block eof
-block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote,
- imageBlock, unknownDirective, header,
- hrule, list, fieldList, lineBlock, para, plain,
- nullBlock ] <?> "block"
+block = choice [ codeBlock
+ , rawHtmlBlock
+ , rawLaTeXBlock
+ , blockQuote
+ , imageBlock
+ , unknownDirective
+ , header
+ , hrule
+ , list
+ , fieldList
+ , lineBlock
+ , para
+ , plain
+ , nullBlock ] <?> "block"
--
-- field list
--
-fieldListItem = try (do
+fieldListItem = try $ do
char ':'
name <- many1 alphaNum
string ": "
skipSpaces
first <- manyTill anyChar newline
- rest <- many (do
- notFollowedBy (char ':')
- notFollowedBy blankline
- skipSpaces
- manyTill anyChar newline )
- return (name, (joinWithSep " " (first:rest))))
-
-fieldList = try (do
+ rest <- many (notFollowedBy ((char ':') <|> blankline) >>
+ skipSpaces >> manyTill anyChar newline)
+ return $ (name, (joinWithSep " " (first:rest)))
+
+fieldList = try $ do
items <- many1 fieldListItem
blanklines
- let authors = case (lookup "Authors" items) of
+ let authors = case lookup "Authors" items of
Just auth -> [auth]
Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
let date = case (lookup "Date" items) of
@@ -162,82 +156,74 @@ fieldList = try (do
updateState (\st -> st { stateAuthors = authors,
stateDate = date,
stateTitle = title })
- return (BlockQuote result))
+ return $ BlockQuote result
--
-- line block
--
-lineBlockLine = try (do
+lineBlockLine = try $ do
string "| "
white <- many (oneOf " \t")
line <- manyTill inline newline
- let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak]
- return line')
+ return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
-lineBlock = try (do
+lineBlock = try $ do
lines <- many1 lineBlockLine
blanklines
- return $ Para (concat lines))
+ return $ Para (concat lines)
--
-- paragraph block
--
-para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph"
+para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
-codeBlockStart = try (do
- string "::"
- blankline
- blankline)
+codeBlockStart = try $ string "::" >> blankline >> blankline
-- paragraph that ends in a :: starting a code block
-paraBeforeCodeBlock = try (do
- result <- many1 (do {notFollowedBy' codeBlockStart; inline})
+paraBeforeCodeBlock = try $ do
+ result <- many1 (notFollowedBy' codeBlockStart >> inline)
lookAhead (string "::")
- return (Para (if (last result == Space)
- then normalizeSpaces result
- else (normalizeSpaces result) ++ [Str ":"])))
+ return $ Para $ if last result == Space
+ then normalizeSpaces result
+ else (normalizeSpaces result) ++ [Str ":"]
-- regular paragraph
-paraNormal = try (do
+paraNormal = try $ do
result <- many1 inline
newline
blanklines
- let result' = normalizeSpaces result
- return (Para result'))
+ return $ Para $ normalizeSpaces result
-plain = do
- result <- many1 inline
- let result' = normalizeSpaces result
- return (Plain result')
+plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- image block
--
-imageBlock = try (do
+imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
- return (Plain [Image [Str "image"] (src, "")]))
+ return $ Plain [Image [Str "image"] (src, "")]
--
-- header blocks
--
-header = choice [ doubleHeader, singleHeader ] <?> "header"
+header = doubleHeader <|> singleHeader <?> "header"
-- a header with lines on top and bottom
-doubleHeader = try (do
+doubleHeader = try $ do
c <- oneOf underlineChars
rest <- many (char c) -- the top line
let lenTop = length (c:rest)
skipSpaces
newline
- txt <- many1 (do {notFollowedBy blankline; inline})
- pos <- getPosition
+ txt <- many1 (notFollowedBy blankline >> inline)
+ pos <- getPosition
let len = (sourceColumn pos) - 1
- if (len > lenTop) then fail "title longer than border" else (do {return ()})
+ if (len > lenTop) then fail "title longer than border" else return ()
blankline -- spaces and newline
count lenTop (char c) -- the bottom line
blanklines
@@ -249,10 +235,10 @@ doubleHeader = try (do
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
- return (Header level (normalizeSpaces txt)))
+ return $ Header level (normalizeSpaces txt)
-- a header with line on the bottom only
-singleHeader = try (do
+singleHeader = try $ do
notFollowedBy' whitespace
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
@@ -268,19 +254,19 @@ singleHeader = try (do
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
- return (Header level (normalizeSpaces txt)))
+ return $ Header level (normalizeSpaces txt)
--
-- hrule block
--
-hruleWith chr = try (do
+hruleWith chr = try $ do
count 4 (char chr)
skipMany (char chr)
skipSpaces
newline
blanklines
- return HorizontalRule)
+ return HorizontalRule
hrule = choice (map hruleWith underlineChars) <?> "hrule"
@@ -289,15 +275,16 @@ hrule = choice (map hruleWith underlineChars) <?> "hrule"
--
-- read a line indented by a given string
-indentedLine indents = try (do
+indentedLine indents = try $ do
string indents
result <- manyTill anyChar newline
- return (result ++ "\n"))
+ return $ result ++ "\n"
--- two or more indented lines, possibly separated by blank lines
--- if variable = True, then any indent will work, but it must be consistent through the block
--- if variable = False, indent should be one tab or equivalent in spaces
-indentedBlock variable = try (do
+-- two or more indented lines, possibly separated by blank lines.
+-- if variable = True, then any indent will work, but it must be
+-- consistent through the block.
+-- if variable = False, indent should be one tab or equivalent in spaces.
+indentedBlock variable = try $ do
state <- getState
let tabStop = stateTabStop state
indents <- if variable
@@ -305,51 +292,47 @@ indentedBlock variable = try (do
else oneOfStrings ["\t", (replicate tabStop ' ')]
firstline <- manyTill anyChar newline
rest <- many (choice [ indentedLine indents,
- try (do
- b <- blanklines
- l <- indentedLine indents
- return (b ++ l))])
- option "" blanklines
- return (firstline ++ "\n" ++ (concat rest)))
-
-codeBlock = try (do
+ try (do b <- blanklines
+ l <- indentedLine indents
+ return (b ++ l))])
+ optional blanklines
+ return $ firstline ++ "\n" ++ concat rest
+
+codeBlock = try $ do
codeBlockStart
result <- indentedBlock False
-- the False means we want one tab stop indent on each line
- return (CodeBlock (stripTrailingNewlines result)))
+ return $ CodeBlock $ stripTrailingNewlines result
--
-- raw html
--
-rawHtmlBlock = try (do
- string ".. raw:: html"
- blanklines
- result <- indentedBlock True
- return (RawHtml result))
+rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
+ indentedBlock True >>= return . RawHtml
--
-- raw latex
--
-rawLaTeXBlock = try (do
+rawLaTeXBlock = try $ do
string ".. raw:: latex"
blanklines
result <- indentedBlock True
- return (Para [(TeX result)]))
+ return $ Para [(TeX result)]
--
-- block quotes
--
-blockQuote = try (do
+blockQuote = try $ do
raw <- indentedBlock True
-- parse the extracted block, which may contain various block elements:
rest <- getInput
setInput $ raw ++ "\n\n"
contents <- parseBlocks
setInput rest
- return (BlockQuote contents))
+ return $ BlockQuote contents
--
-- list blocks
@@ -369,15 +352,14 @@ definitionListItem = try $ do
definitionList = try $ do
items <- many1 definitionListItem
- return (DefinitionList items)
+ return $ DefinitionList items
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart = try (do
+bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
white <- many1 spaceChar
- let len = length (marker:white)
- return len)
+ return $ length (marker:white)
-- parses ordered list start and returns its length (inc following whitespace)
orderedListStart style delim = try $ do
@@ -386,11 +368,11 @@ orderedListStart style delim = try $ do
return $ markerLen + length white
-- parse a line of a list item
-listLine markerLength = try (do
+listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
line <- manyTill anyChar newline
- return (line ++ "\n"))
+ return $ line ++ "\n"
-- indent by specified number of spaces (or equiv. tabs)
indentWith num = do
@@ -399,7 +381,7 @@ indentWith num = do
if (num < tabStop)
then count num (char ' ')
else choice [ try (count num (char ' ')),
- (try (do {char '\t'; count (num - tabStop) (char ' ')})) ]
+ (try (char '\t' >> count (num - tabStop) (char ' '))) ]
-- parse raw text for one list item, excluding start marker and continuations
rawListItem start = try $ do
@@ -411,19 +393,16 @@ rawListItem start = try $ do
-- continuation of a list item - indented and separated by blankline or
-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
-listContinuation markerLength = try (do
+listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
- return (blanks ++ (concat result)))
+ return $ blanks ++ concat result
-listItem start = try (do
+listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
- blanks <- choice [ try (do
- b <- many blankline
- lookAhead start
- return b),
- many1 blankline ] -- whole list must end with blank
+ blanks <- choice [ try (many blankline >>~ lookAhead start),
+ many1 blankline ] -- whole list must end with blank.
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
-- see definition of "endline"
@@ -436,52 +415,44 @@ listItem start = try (do
parsed <- parseBlocks
setInput remaining
updateState (\st -> st {stateParserContext = oldContext})
- return parsed)
+ return parsed
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListMarker
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify items
- return (OrderedList (start, style, delim) items')
+ return $ OrderedList (start, style, delim) items'
-bulletList = try (do
+bulletList = try $ do
items <- many1 (listItem bulletListStart)
let items' = compactify items
- return (BulletList items'))
+ return $ BulletList items'
--
-- unknown directive (e.g. comment)
--
-unknownDirective = try (do
+unknownDirective = try $ do
string ".. "
manyTill anyChar newline
- many (do
- string " "
- char ':'
- many1 (noneOf "\n:")
- char ':'
- many1 (noneOf "\n")
- newline)
- option "" blanklines
- return Null)
+ many (string " :" >> many1 (noneOf "\n:") >> char ':' >>
+ many1 (noneOf "\n") >> newline)
+ optional blanklines
+ return Null
--
-- reference key
--
-referenceKey = do
- result <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
- option "" blanklines
- return result
+referenceKey =
+ choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] >>~
+ optional blanklines
targetURI = try $ do
skipSpaces
- option ' ' newline
- contents <- many1 (try (do many spaceChar
- newline
- many1 spaceChar
- noneOf " \t\n") <|> noneOf "\n")
+ optional newline
+ contents <- many1 (try (many spaceChar >> newline >>
+ many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
return contents
@@ -516,71 +487,73 @@ regularKey = try $ do
-- inline
--
-inline = choice [ superscript, subscript,
- escapedChar, link, image, hyphens, strong, emph, code,
- str, tabchar, whitespace, endline, symbol ] <?> "inline"
-
-hyphens = try (do
+inline = choice [ superscript
+ , subscript
+ , escapedChar
+ , link
+ , image
+ , hyphens
+ , strong
+ , emph
+ , code
+ , str
+ , tabchar
+ , whitespace
+ , endline
+ , symbol ] <?> "inline"
+
+hyphens = try $ do
result <- many1 (char '-')
option Space endline
-- don't want to treat endline after hyphen or dash as a space
- return (Str result))
+ return $ Str result
escapedChar = escaped anyChar
symbol = do
result <- oneOf specialChars
- return (Str [result])
+ return $ Str [result]
-- parses inline code, between codeStart and codeEnd
-code = try (do
+code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
- let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
- return (Code result'))
+ return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
-emph = do
- result <- enclosed (char '*') (char '*') inline
- return (Emph (normalizeSpaces result))
+emph = enclosed (char '*') (char '*') inline >>=
+ return . Emph . normalizeSpaces
-strong = do
- result <- enclosed (string "**") (string "**") inline
- return (Strong (normalizeSpaces result))
+strong = enclosed (string "**") (string "**") inline >>=
+ return . Strong . normalizeSpaces
interpreted role = try $ do
- option "" (try $ string "\\ ")
+ optional $ try $ string "\\ "
result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
nextChar <- lookAhead anyChar
try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
return [Str result]
superscript = interpreted "sup" >>= (return . Superscript)
+
subscript = interpreted "sub" >>= (return . Subscript)
-whitespace = do
- many1 spaceChar <?> "whitespace"
- return Space
+whitespace = many1 spaceChar >> return Space <?> "whitespace"
-tabchar = do
- tab
- return (Str "\t")
+tabchar = tab >> return (Str "\t")
-str = do
- notFollowedBy' oneWordReference
- result <- many1 (noneOf (specialChars ++ "\t\n "))
- return (Str result)
+str = notFollowedBy' oneWordReference >>
+ many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
-- an endline character that can be treated as a space, not a structural break
-endline = try (do
+endline = try $ do
newline
notFollowedBy blankline
-- parse potential list-starts at beginning of line differently in a list:
st <- getState
if ((stateParserContext st) == ListItemState)
- then do notFollowedBy' anyOrderedListMarker
- notFollowedBy' bulletListStart
- else option () pzero
- return Space)
+ then notFollowedBy' anyOrderedListMarker >> notFollowedBy' bulletListStart
+ else return ()
+ return Space
--
-- links
@@ -628,10 +601,10 @@ referenceLink = try $ do
uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
"mailto:", "news:", "telnet:" ]
-uri = try (do
+uri = try $ do
scheme <- uriScheme
identifier <- many1 (noneOf " \t\n")
- return (scheme ++ identifier))
+ return $ scheme ++ identifier
autoURI = try $ do
src <- uri
@@ -639,20 +612,20 @@ autoURI = try $ do
emailChar = alphaNum <|> oneOf "-+_."
-emailAddress = try (do
+emailAddress = try $ do
firstLetter <- alphaNum
restAddr <- many emailChar
let addr = firstLetter:restAddr
char '@'
dom <- domain
- return (addr ++ '@':dom))
+ return $ addr ++ '@':dom
domainChar = alphaNum <|> char '-'
-domain = try (do
+domain = try $ do
first <- many1 domainChar
dom <- many1 (try (do{ char '.'; many1 domainChar }))
- return (joinWithSep "." (first:dom)))
+ return $ joinWithSep "." (first:dom)
autoEmail = try $ do
src <- emailAddress
@@ -669,5 +642,5 @@ image = try $ do
src <- case lookupKeySrc keyTable ref of
Nothing -> fail "no corresponding key"
Just target -> return target
- return (Image (normalizeSpaces ref) src)
+ return $ Image (normalizeSpaces ref) src