diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 362 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 536 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 662 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 321 |
4 files changed, 811 insertions, 1070 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 1742667b8..1eb5d7b4a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -41,12 +41,12 @@ module Text.Pandoc.Readers.HTML ( ) where import Text.ParserCombinators.Parsec -import Text.Pandoc.ParserCombinators import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Entities ( characterEntity, decodeEntities ) +import Text.Pandoc.CharacterReferences ( characterReference, + decodeCharacterReferences ) import Data.Maybe ( fromMaybe ) -import Data.List ( intersect, takeWhile, dropWhile ) +import Data.List ( takeWhile, dropWhile ) import Data.Char ( toUpper, toLower, isAlphaNum ) -- | Convert HTML-formatted string to 'Pandoc' document. @@ -55,10 +55,6 @@ readHtml :: ParserState -- ^ Parser state -> Pandoc readHtml = readWith parseHtml --- for testing -testString :: String -> IO () -testString = testStringWith parseHtml - -- -- Constants -- @@ -74,26 +70,18 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", -- -- | Read blocks until end tag. -blocksTilEnd tag = try (do - blocks <- manyTill (do {b <- block; spaces; return b}) (htmlEndTag tag) - return $ filter (/= Null) blocks) +blocksTilEnd tag = do + blocks <- manyTill (block >>~ spaces) (htmlEndTag tag) + return $ filter (/= Null) blocks -- | Read inlines until end tag. -inlinesTilEnd tag = try (do - inlines <- manyTill inline (htmlEndTag tag) - return inlines) +inlinesTilEnd tag = manyTill inline (htmlEndTag tag) -- | Parse blocks between open and close tag. -blocksIn tag = try $ do - htmlTag tag - spaces - blocksTilEnd tag +blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag -- | Parse inlines between open and close tag. -inlinesIn tag = try $ do - htmlTag tag - spaces - inlinesTilEnd tag +inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag -- | Extract type from a tag: e.g. @br@ from @\<br\>@ extractTagType :: String -> String @@ -103,19 +91,19 @@ extractTagType ('<':rest) = extractTagType _ = "" -- | Parse any HTML tag (closing or opening) and return text of tag -anyHtmlTag = try (do +anyHtmlTag = try $ do char '<' spaces tag <- many1 alphaNum attribs <- htmlAttributes spaces ender <- option "" (string "/") - let ender' = if (null ender) then "" else " /" + let ender' = if null ender then "" else " /" spaces char '>' - return ("<" ++ tag ++ attribs ++ ender' ++ ">")) + return $ "<" ++ tag ++ attribs ++ ender' ++ ">" -anyHtmlEndTag = try (do +anyHtmlEndTag = try $ do char '<' spaces char '/' @@ -123,19 +111,19 @@ anyHtmlEndTag = try (do tagType <- many1 alphaNum spaces char '>' - return ("</" ++ tagType ++ ">")) + return $ "</" ++ tagType ++ ">" htmlTag :: String -> GenParser Char st (String, [(String, String)]) -htmlTag tag = try (do +htmlTag tag = try $ do char '<' spaces stringAnyCase tag attribs <- many htmlAttribute spaces - option "" (string "/") + optional (string "/") spaces char '>' - return (tag, (map (\(name, content, raw) -> (name, content)) attribs))) + return (tag, (map (\(name, content, raw) -> (name, content)) attribs)) -- parses a quoted html attribute value quoted quoteChar = do @@ -145,20 +133,20 @@ quoted quoteChar = do htmlAttributes = do attrList <- many htmlAttribute - return (concatMap (\(name, content, raw) -> raw) attrList) + return $ concatMap (\(name, content, raw) -> raw) attrList htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute --- minimized boolean attribute (no = and value) -htmlMinimizedAttribute = try (do +-- minimized boolean attribute +htmlMinimizedAttribute = try $ do many1 space name <- many1 (choice [letter, oneOf ".-_:"]) spaces notFollowedBy (char '=') let content = name - return (name, content, (" " ++ name))) + return (name, content, (" " ++ name)) -htmlRegularAttribute = try (do +htmlRegularAttribute = try $ do many1 space name <- many1 (choice [letter, oneOf ".-_:"]) spaces @@ -170,10 +158,10 @@ htmlRegularAttribute = try (do a <- many (alphaNum <|> (oneOf "-._:")) return (a,"")) ] return (name, content, - (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) + (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)) -- | Parse an end tag of type 'tag' -htmlEndTag tag = try (do +htmlEndTag tag = try $ do char '<' spaces char '/' @@ -181,87 +169,83 @@ htmlEndTag tag = try (do stringAnyCase tag spaces char '>' - return ("</" ++ tag ++ ">")) + return $ "</" ++ tag ++ ">" -- | Returns @True@ if the tag is an inline tag. isInline tag = (extractTagType tag) `elem` inlineHtmlTags -anyHtmlBlockTag = try (do - tag <- choice [anyHtmlTag, anyHtmlEndTag] - if isInline tag then fail "inline tag" else return tag) +anyHtmlBlockTag = try $ do + tag <- anyHtmlTag <|> anyHtmlEndTag + if isInline tag then fail "inline tag" else return tag -anyHtmlInlineTag = try (do - tag <- choice [ anyHtmlTag, anyHtmlEndTag ] - if isInline tag then return tag else fail "not an inline tag") +anyHtmlInlineTag = try $ do + tag <- anyHtmlTag <|> anyHtmlEndTag + if isInline tag then return tag else fail "not an inline tag" -- | Parses material between script tags. -- Scripts must be treated differently, because they can contain '<>' etc. -htmlScript = try (do +htmlScript = try $ do open <- string "<script" rest <- manyTill anyChar (htmlEndTag "script") - return (open ++ rest ++ "</script>")) + return $ open ++ rest ++ "</script>" htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ] -rawHtmlBlock = try (do - notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"]) +rawHtmlBlock = try $ do + notFollowedBy' (htmlTag "/body" <|> htmlTag "/html") body <- htmlBlockElement <|> anyHtmlBlockTag - sp <- (many space) + sp <- many space state <- getState - if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null) + if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null -- | Parses an HTML comment. -htmlComment = try (do +htmlComment = try $ do string "<!--" comment <- manyTill anyChar (try (string "-->")) - return ("<!--" ++ comment ++ "-->")) + return $ "<!--" ++ comment ++ "-->" -- -- parsing documents -- -xmlDec = try (do +xmlDec = try $ do string "<?" rest <- manyTill anyChar (char '>') - return ("<?" ++ rest ++ ">")) + return $ "<?" ++ rest ++ ">" -definition = try (do +definition = try $ do string "<!" rest <- manyTill anyChar (char '>') - return ("<!" ++ rest ++ ">")) + return $ "<!" ++ rest ++ ">" -nonTitleNonHead = try (do - notFollowedBy' (htmlTag "title") - notFollowedBy' (htmlTag "/head") - result <- choice [do {rawHtmlBlock; return ' '}, anyChar] - return result) +nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >> + ((rawHtmlBlock >> return ' ') <|> anyChar) -parseTitle = try (do - (tag, attribs) <- htmlTag "title" +parseTitle = try $ do + (tag, _) <- htmlTag "title" contents <- inlinesTilEnd tag spaces - return contents) + return contents -- parse header and return meta-information (for now, just title) -parseHead = try (do +parseHead = try $ do htmlTag "head" spaces skipMany nonTitleNonHead contents <- option [] parseTitle skipMany nonTitleNonHead htmlTag "/head" - return (contents, [], "")) + return (contents, [], "") -skipHtmlTag tag = option ("",[]) (htmlTag tag) +skipHtmlTag tag = optional (htmlTag tag) -- h1 class="title" representation of title in body -bodyTitle = try (do +bodyTitle = try $ do (tag, attribs) <- htmlTag "h1" cl <- case (extractAttribute "class" attribs) of - Just "title" -> do {return ""} + Just "title" -> return "" otherwise -> fail "not title" inlinesTilEnd "h1" - return "") parseHtml = do sepEndBy (choice [xmlDec, definition, htmlComment]) spaces @@ -271,27 +255,30 @@ parseHtml = do spaces skipHtmlTag "body" spaces - option "" bodyTitle -- skip title in body, because it's represented in meta + optional bodyTitle -- skip title in body, because it's represented in meta blocks <- parseBlocks spaces - option "" (htmlEndTag "body") + optional (htmlEndTag "body") spaces - option "" (htmlEndTag "html") + optional (htmlEndTag "html") many anyChar -- ignore anything after </html> eof - return (Pandoc (Meta title authors date) blocks) + return $ Pandoc (Meta title authors date) blocks -- -- parsing blocks -- -parseBlocks = do - spaces - result <- sepEndBy block spaces - return $ filter (/= Null) result +parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null)) -block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain, - rawHtmlBlock ] <?> "block" +block = choice [ codeBlock + , header + , hrule + , list + , blockQuote + , para + , plain + , rawHtmlBlock ] <?> "block" -- -- header blocks @@ -299,53 +286,49 @@ block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain, header = choice (map headerLevel (enumFromTo 1 5)) <?> "header" -headerLevel n = try (do +headerLevel n = try $ do let level = "h" ++ show n (tag, attribs) <- htmlTag level contents <- inlinesTilEnd level - return (Header n (normalizeSpaces contents))) + return $ Header n (normalizeSpaces contents) -- -- hrule block -- -hrule = try (do +hrule = try $ do (tag, attribs) <- htmlTag "hr" state <- getState - if (not (null attribs)) && (stateParseRaw state) - then -- in this case we want to parse it as raw html - unexpected "attributes in hr" - else return HorizontalRule) + if not (null attribs) && stateParseRaw state + then unexpected "attributes in hr" -- parse as raw in this case + else return HorizontalRule -- -- code blocks -- -codeBlock = choice [ preCodeBlock, bareCodeBlock ] <?> "code block" +codeBlock = preCodeBlock <|> bareCodeBlock <?> "code block" -preCodeBlock = try (do +preCodeBlock = try $ do htmlTag "pre" spaces - htmlTag "code" - result <- manyTill anyChar (htmlEndTag "code") + result <- bareCodeBlock spaces htmlEndTag "pre" - return (CodeBlock (stripTrailingNewlines (decodeEntities result)))) + return result -bareCodeBlock = try (do +bareCodeBlock = try $ do htmlTag "code" result <- manyTill anyChar (htmlEndTag "code") - return (CodeBlock (stripTrailingNewlines (decodeEntities result)))) + return $ CodeBlock $ stripTrailingNewlines $ + decodeCharacterReferences result -- -- block quotes -- -blockQuote = try (do - tag <- htmlTag "blockquote" - spaces - blocks <- blocksTilEnd "blockquote" - return (BlockQuote blocks)) +blockQuote = try $ htmlTag "blockquote" >> spaces >> + blocksTilEnd "blockquote" >>= (return . BlockQuote) -- -- list blocks @@ -354,119 +337,105 @@ blockQuote = try (do list = choice [ bulletList, orderedList, definitionList ] <?> "list" orderedList = try $ do - (_, attribs) <- htmlTag "ol" - (start, style) <- option (1, DefaultStyle) $ - do failIfStrict - let sta = fromMaybe "1" $ - lookup "start" attribs - let sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - let sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> DefaultStyle - return (read sta, sty') - spaces - items <- sepEndBy1 (blocksIn "li") spaces - htmlEndTag "ol" - return (OrderedList (start, style, DefaultDelim) items) + (_, attribs) <- htmlTag "ol" + (start, style) <- option (1, DefaultStyle) $ + do failIfStrict + let sta = fromMaybe "1" $ + lookup "start" attribs + let sty = fromMaybe (fromMaybe "" $ + lookup "style" attribs) $ + lookup "class" attribs + let sty' = case sty of + "lower-roman" -> LowerRoman + "upper-roman" -> UpperRoman + "lower-alpha" -> LowerAlpha + "upper-alpha" -> UpperAlpha + "decimal" -> Decimal + _ -> DefaultStyle + return (read sta, sty') + spaces + items <- sepEndBy1 (blocksIn "li") spaces + htmlEndTag "ol" + return $ OrderedList (start, style, DefaultDelim) items bulletList = try $ do - htmlTag "ul" - spaces - items <- sepEndBy1 (blocksIn "li") spaces - htmlEndTag "ul" - return (BulletList items) + htmlTag "ul" + spaces + items <- sepEndBy1 (blocksIn "li") spaces + htmlEndTag "ul" + return $ BulletList items definitionList = try $ do - failIfStrict -- def lists not part of standard markdown - tag <- htmlTag "dl" - spaces - items <- sepEndBy1 definitionListItem spaces - htmlEndTag "dl" - return (DefinitionList items) + failIfStrict -- def lists not part of standard markdown + tag <- htmlTag "dl" + spaces + items <- sepEndBy1 definitionListItem spaces + htmlEndTag "dl" + return $ DefinitionList items definitionListItem = try $ do - terms <- sepEndBy1 (inlinesIn "dt") spaces - defs <- sepEndBy1 (blocksIn "dd") spaces - let term = joinWithSep [LineBreak] terms - return (term, concat defs) + terms <- sepEndBy1 (inlinesIn "dt") spaces + defs <- sepEndBy1 (blocksIn "dd") spaces + let term = joinWithSep [LineBreak] terms + return (term, concat defs) -- -- paragraph block -- -para = try (do - tag <- htmlTag "p" - result <- inlinesTilEnd "p" - return (Para (normalizeSpaces result))) +para = htmlTag "p" >> inlinesTilEnd "p" >>= return . Para . normalizeSpaces -- -- plain block -- -plain = do - result <- many1 inline - return (Plain (normalizeSpaces result)) +plain = many1 inline >>= return . Plain . normalizeSpaces -- -- inline -- -inline = choice [ text, special ] <?> "inline" - -text = choice [ entity, strong, emph, superscript, subscript, - strikeout, spanStrikeout, code, str, - linebreak, whitespace ] <?> "text" - -special = choice [ link, image, rawHtmlInline ] <?> - "link, inline html, or image" - -entity = do - ent <- characterEntity - return $ Str [ent] - -code = try (do +inline = choice [ charRef + , strong + , emph + , superscript + , subscript + , strikeout + , spanStrikeout + , code + , str + , linebreak + , whitespace + , link + , image + , rawHtmlInline ] <?> "inline" + +code = try $ do htmlTag "code" result <- manyTill anyChar (htmlEndTag "code") -- remove internal line breaks, leading and trailing space, - -- and decode entities - let result' = decodeEntities $ removeLeadingTrailingSpace $ - joinWithSep " " $ lines result - return (Code result')) + -- and decode character references + return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ + joinWithSep " " $ lines result rawHtmlInline = do - result <- choice [htmlScript, anyHtmlInlineTag] + result <- htmlScript <|> anyHtmlInlineTag state <- getState if stateParseRaw state then return (HtmlInline result) else return (Str "") -betweenTags tag = try (do - htmlTag tag - result <- inlinesTilEnd tag - return (normalizeSpaces result)) +betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= + return . normalizeSpaces -emph = try (do - result <- choice [betweenTags "em", betweenTags "it"] - return (Emph result)) +emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph -superscript = try $ do - failIfStrict -- strict markdown has no superscript, so treat as raw HTML - result <- betweenTags "sup" - return (Superscript result) +strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong -subscript = try $ do - failIfStrict -- strict markdown has no subscript, so treat as raw HTML - result <- betweenTags "sub" - return (Subscript result) +superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript -strikeout = try $ do - failIfStrict -- strict markdown has no strikeout, so treat as raw HTML - result <- choice [betweenTags "s", betweenTags "strike"] - return (Strikeout result) +subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript + +strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>= + return . Strikeout spanStrikeout = try $ do failIfStrict -- strict markdown has no strikeout, so treat as raw HTML @@ -474,25 +443,14 @@ spanStrikeout = try $ do result <- case (extractAttribute "class" attributes) of Just "strikeout" -> inlinesTilEnd "span" _ -> fail "not a strikeout" - return (Strikeout result) + return $ Strikeout result -strong = try (do - result <- choice [betweenTags "b", betweenTags "strong"] - return (Strong result)) - -whitespace = do - many1 space - return Space +whitespace = many1 space >> return Space -- hard line break -linebreak = do - htmlTag "br" - option ' ' newline - return LineBreak +linebreak = htmlTag "br" >> optional newline >> return LineBreak -str = do - result <- many1 (noneOf "<& \t\n") - return (Str result) +str = many1 (noneOf "<& \t\n") >>= return . Str -- -- links and images @@ -501,27 +459,27 @@ str = do -- extract contents of attribute (attribute names are case-insensitive) extractAttribute name [] = Nothing extractAttribute name ((attrName, contents):rest) = - let name' = map toLower name - attrName' = map toLower attrName in - if (attrName' == name') - then Just (decodeEntities contents) - else extractAttribute name rest + let name' = map toLower name + attrName' = map toLower attrName + in if attrName' == name' + then Just (decodeCharacterReferences contents) + else extractAttribute name rest link = try $ do (tag, attributes) <- htmlTag "a" url <- case (extractAttribute "href" attributes) of - Just url -> do {return url} + Just url -> return url Nothing -> fail "no href" - let title = fromMaybe "" (extractAttribute "title" attributes) + let title = fromMaybe "" $ extractAttribute "title" attributes label <- inlinesTilEnd "a" return $ Link (normalizeSpaces label) (url, title) image = try $ do (tag, attributes) <- htmlTag "img" url <- case (extractAttribute "src" attributes) of - Just url -> do {return url} + Just url -> return url Nothing -> fail "no src" - let title = fromMaybe "" (extractAttribute "title" attributes) + let title = fromMaybe "" $ extractAttribute "title" attributes let alt = fromMaybe "" (extractAttribute "alt" attributes) return $ Image [Str alt] (url, title) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 73a3e4a8f..4b91b528c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -34,7 +34,6 @@ module Text.Pandoc.Readers.LaTeX ( ) where import Text.ParserCombinators.Parsec -import Text.Pandoc.ParserCombinators import Text.Pandoc.Definition import Text.Pandoc.Shared import Data.Maybe ( fromMaybe ) @@ -47,9 +46,6 @@ readLaTeX :: ParserState -- ^ Parser state, including options for parser -> Pandoc readLaTeX = readWith parseLaTeX --- for testing -testString = testStringWith parseLaTeX - -- characters with special meaning specialChars = "\\$%&^&_~#{}\n \t|<>'\"-" @@ -58,12 +54,12 @@ specialChars = "\\$%&^&_~#{}\n \t|<>'\"-" -- -- | Returns text between brackets and its matching pair. -bracketedText openB closeB = try (do +bracketedText openB closeB = do result <- charsInBalanced' openB closeB - return ([openB] ++ result ++ [closeB])) + return $ [openB] ++ result ++ [closeB] -- | Returns an option or argument of a LaTeX command. -optOrArg = choice [ (bracketedText '{' '}'), (bracketedText '[' ']') ] +optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']' -- | True if the string begins with '{'. isArg ('{':rest) = True @@ -73,62 +69,55 @@ isArg other = False commandArgs = many optOrArg -- | Parses LaTeX command, returns (name, star, list of options or arguments). -command = try (do +command = try $ do char '\\' name <- many1 alphaNum star <- option "" (string "*") -- some commands have starred versions args <- commandArgs - return (name, star, args)) + return (name, star, args) -begin name = try (do - string "\\begin{" - string name - char '}' - option [] commandArgs +begin name = try $ do + string $ "\\begin{" ++ name ++ "}" + optional commandArgs spaces - return name) + return name -end name = try (do - string "\\end{" - string name - char '}' +end name = try $ do + string $ "\\end{" ++ name ++ "}" spaces - return name) + return name -- | Returns a list of block elements containing the contents of an -- environment. -environment name = try (do - begin name - spaces - contents <- manyTill block (end name) - return contents) +environment name = try $ begin name >> spaces >> manyTill block (end name) -anyEnvironment = try (do +anyEnvironment = try $ do string "\\begin{" name <- many alphaNum star <- option "" (string "*") -- some environments have starred variants char '}' - option [] commandArgs + optional commandArgs spaces contents <- manyTill block (end (name ++ star)) - return (BlockQuote contents)) + return $ BlockQuote contents -- -- parsing documents -- -- | Process LaTeX preamble, extracting metadata. -processLaTeXPreamble = try (do - manyTill (choice [bibliographic, comment, unknownCommand, nullBlock]) - (try (string "\\begin{document}")) - spaces) +processLaTeXPreamble = try $ manyTill + (choice [bibliographic, comment, unknownCommand, nullBlock]) + (try (string "\\begin{document}")) >> + spaces -- | Parse LaTeX and return 'Pandoc'. parseLaTeX = do - option () processLaTeXPreamble -- preamble might not be present (fragment) + optional processLaTeXPreamble -- preamble might not be present (fragment) + spaces blocks <- parseBlocks spaces - option "" (try (string "\\end{document}")) -- might not be present (in fragment) + optional $ try (string "\\end{document}") -- might not be present (fragment) spaces eof state <- getState @@ -136,21 +125,27 @@ parseLaTeX = do let title' = stateTitle state let authors' = stateAuthors state let date' = stateDate state - return (Pandoc (Meta title' authors' date') blocks') + return $ Pandoc (Meta title' authors' date') blocks' -- -- parsing blocks -- -parseBlocks = do - spaces - result <- many block - return result - -block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, - comment, bibliographic, para, specialEnvironment, - itemBlock, unknownEnvironment, unknownCommand ] <?> - "block" +parseBlocks = spaces >> many block + +block = choice [ hrule + , codeBlock + , header + , list + , blockQuote + , mathBlock + , comment + , bibliographic + , para + , specialEnvironment + , itemBlock + , unknownEnvironment + , unknownCommand ] <?> "block" -- -- header blocks @@ -158,24 +153,21 @@ block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, header = choice (map headerLevel (enumFromTo 1 5)) <?> "header" -headerLevel n = try (do +headerLevel n = try $ do let subs = concat $ replicate (n - 1) "sub" string ("\\" ++ subs ++ "section") - option ' ' (char '*') + optional (char '*') char '{' title <- manyTill inline (char '}') spaces - return (Header n (normalizeSpaces title))) + return $ Header n (normalizeSpaces title) -- -- hrule block -- -hrule = try (do - oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", - "\\newpage" ] - spaces - return HorizontalRule) +hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", + "\\newpage" ] >> spaces >> return HorizontalRule -- -- code blocks @@ -183,37 +175,28 @@ hrule = try (do codeBlock = codeBlock1 <|> codeBlock2 -codeBlock1 = try (do +codeBlock1 = try $ do string "\\begin{verbatim}" -- don't use begin function because it -- gobbles whitespace - option "" blanklines -- we want to gobble blank lines, but not + optional blanklines -- we want to gobble blank lines, but not -- leading space contents <- manyTill anyChar (try (string "\\end{verbatim}")) spaces - return (CodeBlock (stripTrailingNewlines contents))) + return $ CodeBlock (stripTrailingNewlines contents) -codeBlock2 = try (do - string "\\begin{Verbatim}" -- used by fancyverb package +codeBlock2 = try $ do + string "\\begin{Verbatim}" -- used by fancyvrb package option "" blanklines contents <- manyTill anyChar (try (string "\\end{Verbatim}")) spaces - return (CodeBlock (stripTrailingNewlines contents))) + return $ CodeBlock (stripTrailingNewlines contents) -- -- block quotes -- -blockQuote = choice [ blockQuote1, blockQuote2 ] <?> "blockquote" - -blockQuote1 = try (do - blocks <- environment "quote" - spaces - return (BlockQuote blocks)) - -blockQuote2 = try (do - blocks <- environment "quotation" - spaces - return (BlockQuote blocks)) +blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= + return . BlockQuote -- -- math block @@ -223,12 +206,12 @@ mathBlock = mathBlockWith (begin "equation") (end "equation") <|> mathBlockWith (begin "displaymath") (end "displaymath") <|> mathBlockWith (string "\\[") (string "\\]") <?> "math block" -mathBlockWith start end = try (do +mathBlockWith start end = try $ do start spaces result <- manyTill anyChar end spaces - return (BlockQuote [Para [TeX ("$" ++ result ++ "$")]])) + return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]] -- -- list blocks @@ -237,69 +220,66 @@ mathBlockWith start end = try (do list = bulletList <|> orderedList <|> definitionList <?> "list" listItem = try $ do - ("item", _, args) <- command - spaces - state <- getState - let oldParserContext = stateParserContext state - updateState (\state -> state {stateParserContext = ListItemState}) - blocks <- many block - updateState (\state -> state {stateParserContext = oldParserContext}) - opt <- case args of - ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> - parseFromString (many inline) $ tail $ init x - _ -> return [] - return (opt, blocks) + ("item", _, args) <- command + spaces + state <- getState + let oldParserContext = stateParserContext state + updateState (\state -> state {stateParserContext = ListItemState}) + blocks <- many block + updateState (\state -> state {stateParserContext = oldParserContext}) + opt <- case args of + ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> + parseFromString (many inline) $ tail $ init x + _ -> return [] + return (opt, blocks) orderedList = try $ do - string "\\begin{enumerate}" - (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ - try $ do failIfStrict - char '[' - res <- anyOrderedListMarker - char ']' - return res - spaces - option "" $ try $ do string "\\setlength{\\itemindent}" - char '{' - manyTill anyChar (char '}') - spaces - start <- option 1 $ try $ do failIfStrict - string "\\setcounter{enum" - many1 (char 'i') - string "}{" - num <- many1 digit - char '}' - spaces - return $ (read num) + 1 - items <- many listItem - end "enumerate" - spaces - return $ OrderedList (start, style, delim) $ map snd items + string "\\begin{enumerate}" + (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ + try $ do failIfStrict + char '[' + res <- anyOrderedListMarker + char ']' + return res + spaces + option "" $ try $ do string "\\setlength{\\itemindent}" + char '{' + manyTill anyChar (char '}') + spaces + start <- option 1 $ try $ do failIfStrict + string "\\setcounter{enum" + many1 (char 'i') + string "}{" + num <- many1 digit + char '}' + spaces + return $ (read num) + 1 + items <- many listItem + end "enumerate" + spaces + return $ OrderedList (start, style, delim) $ map snd items bulletList = try $ do - begin "itemize" - spaces - items <- many listItem - end "itemize" - spaces - return (BulletList $ map snd items) + begin "itemize" + spaces + items <- many listItem + end "itemize" + spaces + return (BulletList $ map snd items) definitionList = try $ do - begin "description" - spaces - items <- many listItem - end "description" - spaces - return (DefinitionList items) + begin "description" + spaces + items <- many listItem + end "description" + spaces + return (DefinitionList items) -- -- paragraph block -- -para = try (do - result <- many1 inline - spaces - return (Para (normalizeSpaces result))) +para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces -- -- title authors date @@ -307,33 +287,30 @@ para = try (do bibliographic = choice [ maketitle, title, authors, date ] -maketitle = try (do - string "\\maketitle" - spaces - return Null) +maketitle = try (string "\\maketitle") >> spaces >> return Null -title = try (do +title = try $ do string "\\title{" tit <- manyTill inline (char '}') spaces updateState (\state -> state { stateTitle = tit }) - return Null) + return Null -authors = try (do +authors = try $ do string "\\author{" authors <- manyTill anyChar (char '}') spaces let authors' = map removeLeadingTrailingSpace $ lines $ substitute "\\\\" "\n" authors updateState (\state -> state { stateAuthors = authors' }) - return Null) + return Null -date = try (do +date = try $ do string "\\date{" date' <- manyTill anyChar (char '}') spaces updateState (\state -> state { stateDate = date' }) - return Null) + return Null -- -- item block @@ -341,14 +318,14 @@ date = try (do -- -- this forces items to be parsed in different blocks -itemBlock = try (do +itemBlock = try $ do ("item", _, args) <- command state <- getState if (stateParserContext state == ListItemState) then fail "item should be handled by list block" else if null args then return Null - else return (Plain [Str (stripFirstAndLast (head args))])) + else return $ Plain [Str (stripFirstAndLast (head args))] -- -- raw LaTeX @@ -362,77 +339,93 @@ specialEnvironment = do -- these are always parsed as raw -- | Parse any LaTeX environment and return a Para block containing -- the whole literal environment as raw TeX. rawLaTeXEnvironment :: GenParser Char st Block -rawLaTeXEnvironment = try (do - string "\\begin" - char '{' +rawLaTeXEnvironment = try $ do + string "\\begin{" name <- many1 alphaNum star <- option "" (string "*") -- for starred variants let name' = name ++ star char '}' args <- option [] commandArgs let argStr = concat args - contents <- manyTill (choice [(many1 (noneOf "\\")), + contents <- manyTill (choice [ (many1 (noneOf "\\")), (do (Para [TeX str]) <- rawLaTeXEnvironment return str), string "\\" ]) (end name') spaces - return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++ - (concat contents) ++ "\\end{" ++ name' ++ "}")])) + return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++ + concat contents ++ "\\end{" ++ name' ++ "}"] -unknownEnvironment = try (do +unknownEnvironment = try $ do state <- getState result <- if stateParseRaw state -- check whether we should include raw TeX then rawLaTeXEnvironment -- if so, get whole raw environment else anyEnvironment -- otherwise just the contents - return result) + return result -unknownCommand = try (do - notFollowedBy' $ choice $ map end - ["itemize", "enumerate", "description", "document"] +unknownCommand = try $ do + notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", + "document"] (name, star, args) <- command spaces let argStr = concat args state <- getState - if (name == "item") && ((stateParserContext state) == ListItemState) + if name == "item" && (stateParserContext state) == ListItemState then fail "should not be parsed as raw" else string "" if stateParseRaw state - then return (Plain [TeX ("\\" ++ name ++ star ++ argStr)]) - else return (Plain [Str (joinWithSep " " args)])) + then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)] + else return $ Plain [Str (joinWithSep " " args)] -- latex comment -comment = try (do - char '%' - result <- manyTill anyChar newline - spaces - return Null) +comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null -- -- inline -- -inline = choice [ strong, emph, strikeout, superscript, subscript, - ref, lab, code, linebreak, spacer, - math, ellipses, emDash, enDash, hyphen, quoted, apostrophe, - accentedChar, specialChar, specialInline, escapedChar, - unescapedChar, str, endline, whitespace ] <?> "inline" - -specialInline = choice [ url, link, image, footnote, rawLaTeXInline ] - <?> "link, raw TeX, note, or image" +inline = choice [ strong + , emph + , strikeout + , superscript + , subscript + , ref + , lab + , code + , linebreak + , spacer + , math + , ellipses + , emDash + , enDash + , hyphen + , quoted + , apostrophe + , accentedChar + , specialChar + , url + , link + , image + , footnote + , rawLaTeXInline + , escapedChar + , unescapedChar + , str + , endline + , whitespace ] <?> "inline" accentedChar = normalAccentedChar <|> specialAccentedChar -normalAccentedChar = try (do +normalAccentedChar = try $ do char '\\' accent <- oneOf "'`^\"~" - character <- choice [ between (char '{') (char '}') anyChar, anyChar ] + character <- (try $ char '{' >> alphaNum >>~ char '}') <|> alphaNum let table = fromMaybe [] $ lookup character accentTable let result = case lookup accent table of Just num -> chr num Nothing -> '?' - return (Str [result])) + return $ Str [result] -- an association list of letters and association list of accents -- and decimal character numbers. @@ -451,245 +444,179 @@ accentTable = ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, - oslash, pound, euro, copyright, sect ] + oslash, pound, euro, copyright, sect ] -ccedil = try (do +ccedil = try $ do char '\\' letter <- oneOfStrings ["cc", "cC"] let num = if letter == "cc" then 231 else 199 - return (Str [chr num])) + return $ Str [chr num] -aring = try (do +aring = try $ do char '\\' letter <- oneOfStrings ["aa", "AA"] let num = if letter == "aa" then 229 else 197 - return (Str [chr num])) + return $ Str [chr num] -iuml = try (do - string "\\\"" - oneOfStrings ["\\i", "{\\i}"] - return (Str [chr 239])) +iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> + return (Str [chr 239]) -icirc = try (do - string "\\^" - oneOfStrings ["\\i", "{\\i}"] - return (Str [chr 238])) +icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >> + return (Str [chr 238]) -szlig = try (do - string "\\ss" - return (Str [chr 223])) +szlig = try (string "\\ss") >> return (Str [chr 223]) -oslash = try (do +oslash = try $ do char '\\' letter <- choice [char 'o', char 'O'] let num = if letter == 'o' then 248 else 216 - return (Str [chr num])) + return $ Str [chr num] -aelig = try (do +aelig = try $ do char '\\' letter <- oneOfStrings ["ae", "AE"] let num = if letter == "ae" then 230 else 198 - return (Str [chr num])) + return $ Str [chr num] -pound = try (do - string "\\pounds" - return (Str [chr 163])) +pound = try (string "\\pounds") >> return (Str [chr 163]) -euro = try (do - string "\\euro" - return (Str [chr 8364])) +euro = try (string "\\euro") >> return (Str [chr 8364]) -copyright = try (do - string "\\copyright" - return (Str [chr 169])) +copyright = try (string "\\copyright") >> return (Str [chr 169]) -sect = try (do - string "\\S" - return (Str [chr 167])) +sect = try (string "\\S") >> return (Str [chr 167]) escapedChar = do result <- escaped (oneOf " $%&_#{}\n") - return (if result == Str "\n" then Str " " else result) + return $ if result == Str "\n" then Str " " else result -unescapedChar = do -- ignore standalone, nonescaped special characters - oneOf "$^&_#{}|<>" - return (Str "") +-- ignore standalone, nonescaped special characters +unescapedChar = oneOf "$^&_#{}|<>" >> return (Str "") specialChar = choice [ backslash, tilde, caret, bar, lt, gt ] -backslash = try (do - string "\\textbackslash" - return (Str "\\")) +backslash = try (string "\\textbackslash") >> return (Str "\\") -tilde = try (do - string "\\ensuremath{\\sim}" - return (Str "~")) +tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~") -caret = try (do - string "\\^{}" - return (Str "^")) +caret = try (string "\\^{}") >> return (Str "^") -bar = try (do - string "\\textbar" - return (Str "\\")) +bar = try (string "\\textbar") >> return (Str "\\") -lt = try (do - string "\\textless" - return (Str "<")) +lt = try (string "\\textless") >> return (Str "<") -gt = try (do - string "\\textgreater" - return (Str ">")) +gt = try (string "\\textgreater") >> return (Str ">") -code = try (do +code = try $ do string "\\verb" marker <- anyChar result <- manyTill anyChar (char marker) - let result' = removeLeadingTrailingSpace result - return (Code result')) + return $ Code $ removeLeadingTrailingSpace result -emph = try (do - oneOfStrings [ "\\emph{", "\\textit{" ] - result <- manyTill inline (char '}') - return (Emph result)) +emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >> + manyTill inline (char '}') >>= return . Emph -strikeout = try $ do - string "\\sout{" - result <- manyTill inline (char '}') - return (Strikeout result) +strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>= + return . Strikeout -superscript = try $ do - string "\\textsuperscript{" - result <- manyTill inline (char '}') - return (Superscript result) +superscript = try $ string "\\textsuperscript{" >> + manyTill inline (char '}') >>= return . Superscript -- note: \textsubscript isn't a standard latex command, but we use -- a defined version in pandoc. -subscript = try $ do - string "\\textsubscript{" - result <- manyTill inline (char '}') - return (Subscript result) +subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>= + return . Subscript -apostrophe = do - char '\'' - return Apostrophe +apostrophe = char '\'' >> return Apostrophe -quoted = do - doubleQuoted <|> singleQuoted +quoted = doubleQuoted <|> singleQuoted -singleQuoted = try (do - result <- enclosed singleQuoteStart singleQuoteEnd inline - return $ Quoted SingleQuote $ normalizeSpaces result) +singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>= + return . Quoted SingleQuote . normalizeSpaces -doubleQuoted = try (do - result <- enclosed doubleQuoteStart doubleQuoteEnd inline - return $ Quoted DoubleQuote $ normalizeSpaces result) +doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>= + return . Quoted DoubleQuote . normalizeSpaces singleQuoteStart = char '`' -singleQuoteEnd = char '\'' >> notFollowedBy alphaNum +singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum doubleQuoteStart = string "``" doubleQuoteEnd = string "''" -ellipses = try (do - string "\\ldots" - option "" (try (string "{}")) - return Ellipses) +ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >> + return Ellipses -enDash = try (do - string "--" - notFollowedBy (char '-') - return EnDash) +enDash = try (string "--") >> return EnDash -emDash = try (do - string "---" - return EmDash) +emDash = try (string "---") >> return EmDash -hyphen = do - char '-' - return (Str "-") +hyphen = char '-' >> return (Str "-") -lab = try (do +lab = try $ do string "\\label{" result <- manyTill anyChar (char '}') - return (Str ("(" ++ result ++ ")"))) + return $ Str $ "(" ++ result ++ ")" -ref = try (do - string "\\ref{" - result <- manyTill anyChar (char '}') - return (Str (result))) +ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str -strong = try (do - string "\\textbf{" - result <- manyTill inline (char '}') - return (Strong result)) +strong = try (string "\\textbf{") >> manyTill inline (char '}') >>= + return . Strong -whitespace = do - many1 (oneOf "~ \t") - return Space +whitespace = many1 (oneOf "~ \t") >> return Space -- hard line break -linebreak = try (do - string "\\\\" - return LineBreak) +linebreak = try (string "\\\\") >> return LineBreak -spacer = try $ do - string "\\," - return (Str "") +spacer = try (string "\\,") >> return (Str "") -str = do - result <- many1 (noneOf specialChars) - return (Str result) +str = many1 (noneOf specialChars) >>= return . Str -- endline internal to paragraph -endline = try (do - newline - notFollowedBy blankline - return Space) +endline = try $ newline >> notFollowedBy blankline >> return Space -- math math = math1 <|> math2 <?> "math" -math1 = try (do +math1 = try $ do char '$' result <- many (noneOf "$") char '$' - return (TeX ("$" ++ result ++ "$"))) + return $ TeX ("$" ++ result ++ "$") -math2 = try (do +math2 = try $ do string "\\(" result <- many (noneOf "$") string "\\)" - return (TeX ("$" ++ result ++ "$"))) + return $ TeX ("$" ++ result ++ "$") -- -- links and images -- -url = try (do +url = try $ do string "\\url" url <- charsInBalanced '{' '}' - return (Link [Code url] (url, ""))) + return $ Link [Code url] (url, "") -link = try (do +link = try $ do string "\\href{" url <- manyTill anyChar (char '}') char '{' label <- manyTill inline (char '}') - return (Link (normalizeSpaces label) (url, ""))) + return $ Link (normalizeSpaces label) (url, "") -image = try (do +image = try $ do ("includegraphics", _, args) <- command let args' = filter isArg args -- filter out options let src = if null args' then ("", "") else (stripFirstAndLast (head args'), "") - return (Image [Str "image"] src)) + return $ Image [Str "image"] src -footnote = try (do +footnote = try $ do (name, _, (contents:[])) <- command if ((name == "footnote") || (name == "thanks")) then string "" @@ -700,16 +627,15 @@ footnote = try (do setInput $ contents' blocks <- parseBlocks setInput rest - return (Note blocks)) + return $ Note blocks -- | Parse any LaTeX command and return it in a raw TeX inline element. rawLaTeXInline :: GenParser Char ParserState Inline -rawLaTeXInline = try (do +rawLaTeXInline = try $ do (name, star, args) <- command - let argStr = concat args state <- getState if ((name == "begin") || (name == "end") || (name == "item")) then fail "not an inline command" else string "" - return (TeX ("\\" ++ name ++ star ++ argStr))) + return $ TeX ("\\" ++ name ++ star ++ concat args) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 3ccb74ba7..80a8507b4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,28 +31,24 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect, lookup ) +import Data.List ( transpose, isSuffixOf, lookup, sortBy ) +import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) -import Text.Pandoc.ParserCombinators import Text.Pandoc.Definition -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) import Text.Pandoc.Shared +import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType, htmlBlockElement ) -import Text.Pandoc.Entities ( characterEntity, decodeEntities ) +import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ParserState -> String -> Pandoc readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n") --- | Parse markdown string with default options and print result (for testing). -testString :: String -> IO () -testString = testStringWith parseMarkdown - -- -- Constants and data structure definitions -- @@ -70,19 +66,16 @@ specialChars = "\\[]*_~`<>$!^-.&'\"" -- auxiliary functions -- --- | Skip a single endline if there is one. -skipEndline = option Space endline - indentSpaces = try $ do state <- getState let tabStop = stateTabStop state try (count tabStop (char ' ')) <|> - (do{many (char ' '); string "\t"}) <?> "indentation" + (many (char ' ') >> string "\t") <?> "indentation" nonindentSpaces = do state <- getState let tabStop = stateTabStop state - choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)])) + choice $ map (\n -> (try (count n (char ' ')))) $ reverse [0..(tabStop - 1)] -- | Fail unless we're at beginning of a line. failUnlessBeginningOfLine = do @@ -94,20 +87,21 @@ failUnlessSmart = do state <- getState if stateSmart state then return () else fail "Smart typography feature" +-- | Parse an inline Str element with a given content. +inlineString str = try $ do + (Str res) <- inline + if res == str then return res else fail $ "unexpected Str content" + -- | Parse a sequence of inline elements between a string -- @opener@ and a string @closer@, including inlines -- between balanced pairs of @opener@ and a @closer@. inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline] inlinesInBalanced opener closer = try $ do - let openerSymbol = try $ do - res <- inline - if res == Str opener - then return res - else pzero - try (string opener) - result <- manyTill ( (do lookAhead openerSymbol - bal <- inlinesInBalanced opener closer - return $ [Str opener] ++ bal ++ [Str closer]) + string opener + result <- manyTill ( (do lookAhead (inlineString opener) + -- because it might be a link... + bal <- inlinesInBalanced opener closer + return $ [Str opener] ++ bal ++ [Str closer]) <|> (count 1 inline)) (try (string closer)) return $ concat result @@ -116,59 +110,55 @@ inlinesInBalanced opener closer = try $ do -- document structure -- -titleLine = try (do - char '%' - skipSpaces - line <- manyTill inline newline - return line) +titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline -authorsLine = try (do +authorsLine = try $ do char '%' skipSpaces authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") newline - return (map (decodeEntities . removeLeadingTrailingSpace) authors)) + return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors -dateLine = try (do +dateLine = try $ do char '%' skipSpaces date <- many (noneOf "\n") newline - return (decodeEntities $ removeTrailingSpace date)) + return $ decodeCharacterReferences $ removeTrailingSpace date -titleBlock = try (do +titleBlock = try $ do failIfStrict title <- option [] titleLine author <- option [] authorsLine date <- option "" dateLine - option "" blanklines - return (title, author, date)) + optional blanklines + return (title, author, date) parseMarkdown = do - updateState (\state -> state { stateParseRaw = True }) -- markdown allows raw HTML + -- markdown allows raw HTML + updateState (\state -> state { stateParseRaw = True }) (title, author, date) <- option ([],[],"") titleBlock -- go through once just to get list of reference keys - refs <- manyTill (referenceKey <|> (do l <- lineClump - return (LineClump l))) eof + refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof let keys = map (\(KeyBlock label target) -> (label, target)) $ filter isKeyBlock refs let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs setInput $ concat rawlines -- with keys stripped out updateState (\state -> state { stateKeys = keys }) - -- now go through for notes - refs <- manyTill (noteBlock <|> (do l <- lineClump - return (LineClump l))) eof + -- now go through for notes (which may contain references - hence 2nd pass) + refs <- manyTill (noteBlock <|> (lineClump >>= return . LineClump)) eof let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $ filter isNoteBlock refs let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs - setInput $ concat rawlines -- with note blocks and keys stripped out + -- go through a 3rd time, with note blocks and keys stripped out + setInput $ concat rawlines updateState (\state -> state { stateNotes = notes }) - blocks <- parseBlocks -- go through again, for real + blocks <- parseBlocks let blocks' = filter (/= Null) blocks - return (Pandoc (Meta title author date) blocks') + return $ Pandoc (Meta title author date) blocks' -- --- initial pass for references +-- initial pass for references and notes -- referenceKey = try $ do @@ -176,9 +166,9 @@ referenceKey = try $ do label <- reference char ':' skipSpaces - option ' ' (char '<') + optional (char '<') src <- many (noneOf "> \n\t") - option ' ' (char '>') + optional (char '>') tit <- option "" title blanklines return $ KeyBlock label (removeTrailingSpace src, tit) @@ -189,33 +179,28 @@ noteMarker = try $ do manyTill (noneOf " \t\n") (char ']') rawLine = try $ do - notFollowedBy' blankline + notFollowedBy blankline notFollowedBy' noteMarker contents <- many1 nonEndline - end <- option "" (do - newline - option "" indentSpaces - return "\n") - return (contents ++ end) + end <- option "" (newline >> optional indentSpaces >> return "\n") + return $ contents ++ end -rawLines = do - lines <- many1 rawLine - return (concat lines) +rawLines = many1 rawLine >>= return . concat noteBlock = try $ do failIfStrict ref <- noteMarker char ':' - option ' ' blankline - option "" indentSpaces - raw <- sepBy rawLines (try (do {blankline; indentSpaces})) - option "" blanklines + optional blankline + optional indentSpaces + raw <- sepBy rawLines (try (blankline >> indentSpaces)) + optional blanklines -- parse the extracted text, which may contain various block elements: rest <- getInput setInput $ (joinWithSep "\n" raw) ++ "\n\n" contents <- parseBlocks setInput rest - return (NoteBlock ref contents) + return $ NoteBlock ref contents -- -- parsing blocks @@ -239,48 +224,39 @@ block = choice [ header -- header blocks -- -header = choice [ setextHeader, atxHeader ] <?> "header" +header = setextHeader <|> atxHeader <?> "header" -atxHeader = try (do +atxHeader = try $ do lead <- many1 (char '#') - notFollowedBy (char '.') -- this would be a list - notFollowedBy (char ')') + notFollowedBy (char '.' <|> char ')') -- this would be a list skipSpaces txt <- manyTill inline atxClosing - return (Header (length lead) (normalizeSpaces txt))) + return $ Header (length lead) (normalizeSpaces txt) -atxClosing = try (do - skipMany (char '#') - skipSpaces - newline - option "" blanklines) +atxClosing = try $ skipMany (char '#') >> skipSpaces >> newline >> + option "" blanklines setextHeader = choice $ - map (\x -> setextH x) (enumFromTo 1 (length setextHChars)) + map (\x -> setextH x) $ enumFromTo 1 (length setextHChars) -setextH n = try (do +setextH n = try $ do txt <- many1Till inline newline many1 (char (setextHChars !! (n-1))) skipSpaces newline - option "" blanklines - return (Header n (normalizeSpaces txt))) + optional blanklines + return $ Header n (normalizeSpaces txt) -- -- hrule block -- -hruleWith chr = try (do - skipSpaces - char chr - skipSpaces - char chr - skipSpaces - char chr - skipMany (oneOf (chr:spaceChars)) +hruleWith chr = try $ do + count 3 (skipSpaces >> char chr) + skipMany (skipSpaces >> char chr) newline - option "" blanklines - return HorizontalRule) + optional blanklines + return HorizontalRule hrule = choice (map hruleWith hruleChars) <?> "hrule" @@ -288,67 +264,55 @@ hrule = choice (map hruleWith hruleChars) <?> "hrule" -- code blocks -- -indentedLine = try (do +indentedLine = try $ do indentSpaces result <- manyTill anyChar newline - return (result ++ "\n")) + return $ result ++ "\n" -- two or more indented lines, possibly separated by blank lines -indentedBlock = try (do +indentedBlock = try $ do res1 <- indentedLine blanks <- many blankline - res2 <- choice [indentedBlock, indentedLine] - return (res1 ++ blanks ++ res2)) + res2 <- indentedBlock <|> indentedLine + return $ res1 ++ blanks ++ res2 -codeBlock = do - result <- choice [indentedBlock, indentedLine] - option "" blanklines - return (CodeBlock (stripTrailingNewlines result)) +codeBlock = (indentedBlock <|> indentedLine) >>~ optional blanklines >>= + return . CodeBlock . stripTrailingNewlines -- -- block quotes -- -emacsBoxQuote = try (do +emacsBoxQuote = try $ do failIfStrict string ",----" manyTill anyChar newline - raw <- manyTill (try (do - char '|' - option ' ' (char ' ') - result <- manyTill anyChar newline - return result)) - (string "`----") - manyTill anyChar newline - option "" blanklines - return raw) + raw <- manyTill + (try (char '|' >> optional (char ' ') >> manyTill anyChar newline)) + (try (string "`----")) + blanklines + return raw -emailBlockQuoteStart = try (do - nonindentSpaces - char '>' - option ' ' (char ' ') - return "> ") +emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') -emailBlockQuote = try (do +emailBlockQuote = try $ do emailBlockQuoteStart - raw <- sepBy (many (choice [nonEndline, - (try (do - endline - notFollowedBy' emailBlockQuoteStart - return '\n'))])) - (try (do {newline; emailBlockQuoteStart})) - newline <|> (do{ eof; return '\n' }) - option "" blanklines - return raw) + raw <- sepBy (many (nonEndline <|> + (try (endline >> notFollowedBy emailBlockQuoteStart >> + return '\n')))) + (try (newline >> emailBlockQuoteStart)) + newline <|> (eof >> return '\n') + optional blanklines + return raw blockQuote = do - raw <- choice [ emailBlockQuote, emacsBoxQuote ] + raw <- emailBlockQuote <|> emacsBoxQuote -- parse the extracted block, which may contain various block elements: rest <- getInput setInput $ (joinWithSep "\n" raw) ++ "\n\n" contents <- parseBlocks setInput rest - return (BlockQuote contents) + return $ BlockQuote contents -- -- list blocks @@ -357,7 +321,7 @@ blockQuote = do list = choice [ bulletList, orderedList, definitionList ] <?> "list" bulletListStart = try $ do - option ' ' newline -- if preceded by a Plain block in a list context + optional newline -- if preceded by a Plain block in a list context nonindentSpaces notFollowedBy' hrule -- because hrules start out just like lists oneOf bulletListMarkers @@ -365,7 +329,7 @@ bulletListStart = try $ do skipSpaces anyOrderedListStart = try $ do - option ' ' newline -- if preceded by a Plain block in a list context + optional newline -- if preceded by a Plain block in a list context nonindentSpaces state <- getState if stateStrict state @@ -375,7 +339,7 @@ anyOrderedListStart = try $ do else anyOrderedListMarker orderedListStart style delim = try $ do - option ' ' newline -- if preceded by a Plain block in a list context + optional newline -- if preceded by a Plain block in a list context nonindentSpaces state <- getState if stateStrict state @@ -387,40 +351,39 @@ orderedListStart style delim = try $ do skipSpaces -- parse a line of a list item (start = parser for beginning of list item) -listLine start = try (do +listLine start = try $ do notFollowedBy' start notFollowedBy blankline - notFollowedBy' (do - indentSpaces - many (spaceChar) - choice [bulletListStart, anyOrderedListStart >> return ()]) + notFollowedBy' (do indentSpaces + many (spaceChar) + bulletListStart <|> (anyOrderedListStart >> return ())) line <- manyTill anyChar newline - return (line ++ "\n")) + return $ line ++ "\n" -- parse raw text for one list item, excluding start marker and continuations -rawListItem start = try (do +rawListItem start = try $ do start result <- many1 (listLine start) blanks <- many blankline - return ((concat result) ++ blanks)) + return $ concat result ++ blanks -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation start = try (do +listContinuation start = try $ do lookAhead indentSpaces result <- many1 (listContinuationLine start) blanks <- many blankline - return ((concat result) ++ blanks)) + return $ concat result ++ blanks -listContinuationLine start = try (do - notFollowedBy' blankline +listContinuationLine start = try $ do + notFollowedBy blankline notFollowedBy' start - option "" indentSpaces + optional indentSpaces result <- manyTill anyChar newline - return (result ++ "\n")) + return $ result ++ "\n" -listItem start = try (do +listItem start = try $ do first <- rawListItem start continuations <- many (listContinuation start) -- parsing with ListItemState forces markers at beginning of lines to @@ -436,18 +399,15 @@ listItem start = try (do contents <- parseBlocks setInput rest updateState (\st -> st {stateParserContext = oldContext}) - return contents) + return contents -orderedList = try (do +orderedList = do (start, style, delim) <- lookAhead anyOrderedListStart items <- many1 (listItem (orderedListStart style delim)) - let items' = compactify items - return (OrderedList (start, style, delim) items')) + return $ OrderedList (start, style, delim) $ compactify items -bulletList = try (do - items <- many1 (listItem bulletListStart) - let items' = compactify items - return (BulletList items')) +bulletList = many1 (listItem bulletListStart) >>= + return . BulletList . compactify -- definition lists @@ -470,9 +430,9 @@ defRawBlock = try $ do char ':' state <- getState let tabStop = stateTabStop state - try (count (tabStop - 1) (char ' ')) <|> (do{many (char ' '); string "\t"}) + try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t") firstline <- anyLine - rawlines <- many (do {notFollowedBy' blankline; indentSpaces; anyLine}) + rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine) trailing <- option "" blanklines return $ firstline ++ "\n" ++ unlines rawlines ++ trailing @@ -488,71 +448,62 @@ definitionList = do -- paragraph block -- -para = try (do +para = try $ do result <- many1 inline newline st <- getState if stateStrict st - then choice [lookAhead blockQuote, lookAhead header, - (do{blanklines; return Null})] - else choice [(do{lookAhead emacsBoxQuote; return Null}), - (do{blanklines; return Null})] - let result' = normalizeSpaces result - return (Para result')) - -plain = do - result <- many1 inline - let result' = normalizeSpaces result - return (Plain result') + then choice [ lookAhead blockQuote, lookAhead header, + (blanklines >> return Null) ] + else choice [ lookAhead emacsBoxQuote >> return Null, + (blanklines >> return Null) ] + return $ Para $ normalizeSpaces result + +plain = many1 inline >>= return . Plain . normalizeSpaces -- -- raw html -- -htmlElement = choice [strictHtmlBlock, - htmlBlockElement] <?> "html element" +htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element" htmlBlock = do st <- getState if stateStrict st - then do - failUnlessBeginningOfLine - first <- htmlElement - finalSpace <- many (oneOf spaceChars) - finalNewlines <- many newline - return (RawHtml (first ++ finalSpace ++ finalNewlines)) + then try $ do failUnlessBeginningOfLine + first <- htmlElement + finalSpace <- many (oneOf spaceChars) + finalNewlines <- many newline + return $ RawHtml $ first ++ finalSpace ++ finalNewlines else rawHtmlBlocks -- True if tag is self-closing isSelfClosing tag = isSuffixOf "/>" $ filter (\c -> (not (c `elem` " \n\t"))) tag -strictHtmlBlock = try (do +strictHtmlBlock = try $ do tag <- anyHtmlBlockTag let tag' = extractTagType tag if isSelfClosing tag || tag' == "hr" then return tag - else do - contents <- many (do{notFollowedBy' (htmlEndTag tag'); - htmlElement <|> (count 1 anyChar)}) - end <- htmlEndTag tag' - return $ tag ++ (concat contents) ++ end) + else do contents <- many (notFollowedBy' (htmlEndTag tag') >> + (htmlElement <|> (count 1 anyChar))) + end <- htmlEndTag tag' + return $ tag ++ concat contents ++ end -rawHtmlBlocks = try (do +rawHtmlBlocks = try $ do htmlBlocks <- many1 rawHtmlBlock let combined = concatMap (\(RawHtml str) -> str) htmlBlocks - let combined' = if (last combined == '\n') + let combined' = if not (null combined) && last combined == '\n' then init combined -- strip extra newline else combined - return (RawHtml combined')) + return $ RawHtml combined' -- -- LaTeX -- -rawLaTeXEnvironment' = do - failIfStrict - rawLaTeXEnvironment +rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment -- -- Tables @@ -560,54 +511,46 @@ rawLaTeXEnvironment' = do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine ch = do - dashes <- many1 (char ch) - sp <- many spaceChar - return $ (length dashes, length $ dashes ++ sp) +dashedLine ch = try $ do + dashes <- many1 (char ch) + sp <- many spaceChar + return $ (length dashes, length $ dashes ++ sp) -- Parse a table header with dashed lines of '-' preceded by -- one line of text. -simpleTableHeader = do - rawContent <- anyLine - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines - let rawHeads = tail $ splitByIndices (init indices) rawContent - let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths - return $ (rawHeads, aligns, indices) +simpleTableHeader = try $ do + rawContent <- anyLine + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines) = unzip dashes + let indices = scanl (+) (length initSp) lines + let rawHeads = tail $ splitByIndices (init indices) rawContent + let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths + return (rawHeads, aligns, indices) -- Parse a table footer - dashed lines followed by blank line. -tableFooter = try $ do - nonindentSpaces - many1 (dashedLine '-') - blanklines +tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep = try $ do - nonindentSpaces - many1 (dashedLine '-') - string "\n" +tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" -- Parse a raw line and split it into chunks by indices. rawTableLine indices = do - notFollowedBy' (blanklines <|> tableFooter) - line <- many1Till anyChar newline - return $ map removeLeadingTrailingSpace $ tail $ - splitByIndices (init indices) line + notFollowedBy' (blanklines <|> tableFooter) + line <- many1Till anyChar newline + return $ map removeLeadingTrailingSpace $ tail $ + splitByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). -tableLine indices = try $ do - rawline <- rawTableLine indices - mapM (parseFromString (many plain)) rawline +tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow indices = try $ do - colLines <- many1 (rawTableLine indices) - option "" blanklines - let cols = map unlines $ transpose colLines - mapM (parseFromString (many plain)) cols + colLines <- many1 (rawTableLine indices) + optional blanklines + let cols = map unlines $ transpose colLines + mapM (parseFromString (many plain)) cols -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal @@ -615,22 +558,22 @@ widthsFromIndices :: Int -- Number of columns on terminal -> [Float] -- Fractional relative sizes of columns widthsFromIndices _ [] = [] widthsFromIndices numColumns indices = - let lengths = zipWith (-) indices (0:indices) - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs + let lengths = zipWith (-) indices (0:indices) + totLength = sum lengths + quotient = if totLength > numColumns + then fromIntegral totLength + else fromIntegral numColumns + fracs = map (\l -> (fromIntegral l) / quotient) lengths in + tail fracs -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. tableCaption = try $ do - nonindentSpaces - string "Table:" - result <- many1 inline - blanklines - return $ normalizeSpaces result + nonindentSpaces + string "Table:" + result <- many1 inline + blanklines + return $ normalizeSpaces result -- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. tableWith headerParser lineParser footerParser = try $ do @@ -653,30 +596,19 @@ simpleTable = tableWith simpleTableHeader tableLine blanklines multilineTable = tableWith multilineTableHeader multilineRow tableFooter multilineTableHeader = try $ do - tableSep - rawContent <- many1 (do{notFollowedBy' tableSep; - many1Till anyChar newline}) - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines - let rawHeadsList = transpose $ map - (\ln -> tail $ splitByIndices (init indices) ln) - rawContent - let rawHeads = map (joinWithSep " ") rawHeadsList - let aligns = zipWith alignType rawHeadsList lengths - return $ ((map removeLeadingTrailingSpace rawHeads), - aligns, indices) - --- Returns the longest of a list of strings. -longest :: [String] -> String -longest [] = "" -longest [x] = x -longest (x:xs) = - if (length x) >= (maximum $ map length xs) - then x - else longest xs + tableSep + rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines) = unzip dashes + let indices = scanl (+) (length initSp) lines + let rawHeadsList = transpose $ map + (\ln -> tail $ splitByIndices (init indices) ln) + rawContent + let rawHeads = map (joinWithSep " ") rawHeadsList + let aligns = zipWith alignType rawHeadsList lengths + return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the @@ -684,19 +616,17 @@ longest (x:xs) = alignType :: [String] -> Int -> Alignment alignType [] len = AlignDefault alignType strLst len = - let str = longest $ map removeTrailingSpace strLst - leftSpace = if null str then False else ((str !! 0) `elem` " \t") - rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in - case (leftSpace, rightSpace) of + let str = head $ sortBy (comparing length) $ + map removeTrailingSpace strLst + leftSpace = if null str then False else (str !! 0) `elem` " \t" + rightSpace = length str < len || (str !! (len - 1)) `elem` " \t" + in case (leftSpace, rightSpace) of (True, False) -> AlignRight (False, True) -> AlignLeft - (True, True) -> AlignCenter + (True, True) -> AlignCenter (False, False) -> AlignDefault -table = do - failIfStrict - result <- simpleTable <|> multilineTable <?> "table" - return result +table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table" -- -- inline @@ -704,7 +634,7 @@ table = do inline = choice [ rawLaTeXInline' , escapedChar - , entity + , charRef , note , inlineNote , link @@ -734,80 +664,64 @@ escapedChar = try $ do result <- if stateStrict state then oneOf "\\`*_{}[]()>#+-.!~" else satisfy (not . isAlphaNum) - return (Str [result]) + return $ Str [result] -ltSign = try (do +ltSign = try $ do notFollowedBy (noneOf "<") -- continue only if it's a < notFollowedBy' rawHtmlBlocks -- don't return < if it starts html char '<' - return (Str ['<'])) + return $ Str ['<'] specialCharsMinusLt = filter (/= '<') specialChars symbol = do result <- oneOf specialCharsMinusLt - return (Str [result]) + return $ Str [result] -- parses inline code, between n `s and n `s -code = try (do +code = try $ do starts <- many1 (char '`') let num = length starts result <- many1Till anyChar (try (count num (char '`'))) -- get rid of any internal newlines - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result - return (Code result')) + return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result -mathWord = many1 (choice [ (noneOf " \t\n\\$"), - (try (do - c <- char '\\' - notFollowedBy (char '$') - return c))]) +mathWord = many1 ((noneOf " \t\n\\$") <|> + (try (char '\\') >>~ notFollowedBy (char '$'))) -math = try (do +math = try $ do failIfStrict char '$' notFollowedBy space words <- sepBy1 mathWord (many1 space) char '$' - return (TeX ("$" ++ (joinWithSep " " words) ++ "$"))) + return $ TeX ("$" ++ (joinWithSep " " words) ++ "$") -emph = do - result <- choice [ (enclosed (char '*') (char '*') inline), - (enclosed (char '_') (char '_') inline) ] - return $ Emph (normalizeSpaces result) +emph = ((enclosed (char '*') (char '*') inline) <|> + (enclosed (char '_') (char '_') inline)) >>= + return . Emph . normalizeSpaces -strong = do - result <- (enclosed (string "**") (string "**") inline) <|> - (enclosed (string "__") (string "__") inline) - return $ Strong (normalizeSpaces result) +strong = ((enclosed (string "**") (string "**") inline) <|> + (enclosed (string "__") (string "__") inline)) >>= + return . Strong . normalizeSpaces -strikeout = do - failIfStrict - result <- enclosed (string "~~") (string "~~") inline - return $ Strikeout (normalizeSpaces result) +strikeout = failIfStrict >> enclosed (string "~~") (string "~~") inline >>= + return . Strikeout . normalizeSpaces -superscript = do - failIfStrict - result <- enclosed (char '^') (char '^') - (notFollowedBy' whitespace >> inline) -- may not contain Space - return $ Superscript result +superscript = failIfStrict >> enclosed (char '^') (char '^') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Superscript -subscript = do - failIfStrict - result <- enclosed (char '~') (char '~') - (notFollowedBy' whitespace >> inline) -- may not contain Space - return $ Subscript result +subscript = failIfStrict >> enclosed (char '~') (char '~') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Subscript -smartPunctuation = do - failUnlessSmart - choice [ quoted, apostrophe, dash, ellipses ] +smartPunctuation = failUnlessSmart >> + choice [ quoted, apostrophe, dash, ellipses ] -apostrophe = do - char '\'' <|> char '\8217' - return Apostrophe +apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe -quoted = do - doubleQuoted <|> singleQuoted +quoted = doubleQuoted <|> singleQuoted withQuoteContext context parser = do oldState <- getState @@ -820,15 +734,13 @@ withQuoteContext context parser = do singleQuoted = try $ do singleQuoteStart - withQuoteContext InSingleQuote $ do - result <- many1Till inline singleQuoteEnd - return $ Quoted SingleQuote $ normalizeSpaces result + withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= + return . Quoted SingleQuote . normalizeSpaces doubleQuoted = try $ do doubleQuoteStart - withQuoteContext InDoubleQuote $ do - result <- many1Till inline doubleQuoteEnd - return $ Quoted DoubleQuote $ normalizeSpaces result + withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= + return . Quoted DoubleQuote . normalizeSpaces failIfInQuoteContext context = do st <- getState @@ -836,88 +748,65 @@ failIfInQuoteContext context = do then fail "already inside quotes" else return () -singleQuoteStart = try $ do +singleQuoteStart = do failIfInQuoteContext InSingleQuote - char '\8216' <|> do - char '\'' - notFollowedBy (oneOf ")!],.;:-? \t\n") - notFollowedBy (try (do -- possessive or contraction - oneOfStrings ["s","t","m","ve","ll","re"] - satisfy (not . isAlphaNum))) - return '\'' - -singleQuoteEnd = try $ do - char '\'' <|> char '\8217' - notFollowedBy alphaNum - -doubleQuoteStart = try $ do - failIfInQuoteContext InDoubleQuote - char '"' <|> char '\8220' - notFollowedBy (oneOf " \t\n") + char '\8216' <|> + do char '\'' + notFollowedBy (oneOf ")!],.;:-? \t\n") + notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> + satisfy (not . isAlphaNum))) -- possess/contraction + return '\'' + +singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum + +doubleQuoteStart = failIfInQuoteContext InDoubleQuote >> + (char '"' <|> char '\8220') >> + notFollowedBy (oneOf " \t\n") doubleQuoteEnd = char '"' <|> char '\8221' -ellipses = try (do - oneOfStrings ["...", " . . . ", ". . .", " . . ."] - return Ellipses) +ellipses = try $ oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> + return Ellipses dash = enDash <|> emDash -enDash = try (do - char '-' - notFollowedBy (noneOf "0123456789") - return EnDash) +enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash -emDash = try (do - skipSpaces - oneOfStrings ["---", "--"] - skipSpaces - return EmDash) +emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >> + skipSpaces >> return EmDash -whitespace = do - many1 (oneOf spaceChars) <?> "whitespace" - return Space +whitespace = (many1 (oneOf spaceChars) >> return Space) <?> "whitespace" -tabchar = do - tab - return (Str "\t") +tabchar = tab >> return (Str "\t") -- hard line break -linebreak = try (do - oneOf spaceChars - many1 (oneOf spaceChars) - endline - return LineBreak ) +linebreak = try $ oneOf spaceChars >> many1 (oneOf spaceChars) >> + endline >> return LineBreak nonEndline = satisfy (/='\n') -entity = do - ent <- characterEntity - return $ Str [ent] - strChar = noneOf (specialChars ++ spaceChars ++ "\n") -str = do - result <- many1 strChar - return (Str result) +str = many1 strChar >>= return . Str -- an endline character that can be treated as a space, not a structural break -endline = try (do +endline = try $ do newline notFollowedBy blankline st <- getState if stateStrict st then do - notFollowedBy' emailBlockQuoteStart + notFollowedBy emailBlockQuoteStart notFollowedBy (char '#') -- atx header - notFollowedBy (try (do{manyTill anyChar newline; - oneOf setextHChars})) -- setext header + notFollowedBy (manyTill anyChar newline >> oneOf setextHChars) + -- setext header else return () -- parse potential list-starts differently if in a list: - if (stateParserContext st) == ListItemState - then notFollowedBy' $ choice [bulletListStart, anyOrderedListStart >> return ()] + if stateParserContext st == ListItemState + then notFollowedBy' (bulletListStart <|> + (anyOrderedListStart >> return ())) else return () - return Space) + return Space -- -- links @@ -930,24 +819,23 @@ reference = notFollowedBy' (string "[^") >> -- footnote reference -- source for a link, with optional title source = try $ do char '(' - option ' ' (char '<') + optional (char '<') src <- many (noneOf ")> \t\n") - option ' ' (char '>') + optional (char '>') tit <- option "" title skipSpaces char ')' return (removeTrailingSpace src, tit) -titleWith startChar endChar = try (do +titleWith startChar endChar = try $ do leadingSpace <- many1 (oneOf " \t\n") if length (filter (=='\n') leadingSpace) > 1 then fail "title must be separated by space and on same or next line" else return () char startChar - tit <- manyTill anyChar (try (do char endChar - skipSpaces - notFollowedBy (noneOf ")\n"))) - return $ decodeEntities tit) + tit <- manyTill anyChar (try (char endChar >> skipSpaces >> + notFollowedBy (noneOf ")\n"))) + return $ decodeCharacterReferences tit title = choice [ titleWith '(' ')', titleWith '"' '"', @@ -955,22 +843,20 @@ title = choice [ titleWith '(' ')', link = choice [explicitLink, referenceLink] <?> "link" -explicitLink = try (do +explicitLink = try $ do label <- reference src <- source - return (Link label src)) + return $ Link label src -- a link like [this][ref] or [this][] or [this] referenceLink = try $ do label <- reference - ref <- option [] (try (do skipSpaces - option ' ' newline - skipSpaces - reference)) + ref <- option [] (try (skipSpaces >> optional newline >> + skipSpaces >> reference)) let ref' = if null ref then label else ref state <- getState case lookupKeySrc (stateKeys state) ref' of - Nothing -> fail "no corresponding key" + Nothing -> fail "no corresponding key" Just target -> return (Link label target) autoLink = autoLinkEmail <|> autoLinkRegular @@ -992,10 +878,10 @@ autoLinkRegular = try $ do let src = prot ++ rest return $ Link [Code src] (src, "") -image = try (do +image = try $ do char '!' (Link label src) <- link - return (Image label src)) + return $ Image label src note = try $ do failIfStrict @@ -1003,23 +889,21 @@ note = try $ do state <- getState let notes = stateNotes state case lookup ref notes of - Nothing -> fail "note not found" - Just contents -> return (Note contents) + Nothing -> fail "note not found" + Just contents -> return $ Note contents inlineNote = try $ do failIfStrict char '^' contents <- inlinesInBalanced "[" "]" - return (Note [Para contents]) + return $ Note [Para contents] -rawLaTeXInline' = do - failIfStrict - rawLaTeXInline +rawLaTeXInline' = failIfStrict >> rawLaTeXInline rawHtmlInline' = do st <- getState - result <- if stateStrict st - then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] - else choice [htmlBlockElement, anyHtmlInlineTag] - return (HtmlInline result) + result <- choice $ if stateStrict st + then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] + else [htmlBlockElement, anyHtmlInlineTag] + return $ HtmlInline result diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index a36c33d92..ce8fedf02 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,23 +31,14 @@ module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition -import Text.Pandoc.ParserCombinators import Text.Pandoc.Shared -import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag ) -import Text.Regex ( matchRegex, mkRegex ) import Text.ParserCombinators.Parsec -import Data.Maybe ( fromMaybe ) import Data.List ( findIndex, delete ) -import Data.Char ( toUpper ) -- | Parse reStructuredText string and return Pandoc document. readRST :: ParserState -> String -> Pandoc readRST state str = (readWith parseRST) state (str ++ "\n\n") --- | Parse a string and print result (for testing). -testString :: String -> IO () -testString = testStringWith parseRST - -- -- Constants and data structure definitions --- @@ -62,15 +53,11 @@ specialChars = "\\`|*_<>$:[-" -- parsing documents -- -isAnonKey (ref, src) = (ref == [Str "_"]) - -isHeader1 :: Block -> Bool -isHeader1 (Header 1 _) = True -isHeader1 _ = False +isAnonKey (ref, src) = ref == [Str "_"] -isHeader2 :: Block -> Bool -isHeader2 (Header 2 _) = True -isHeader2 _ = False +isHeader :: Int -> Block -> Bool +isHeader n (Header x _) = x == n +isHeader _ _ = False -- | Promote all headers in a list of blocks. (Part of -- title transformation for RST.) @@ -86,23 +73,23 @@ promoteHeaders num [] = [] titleTransform :: [Block] -- ^ list of blocks -> ([Block], [Inline]) -- ^ modified list of blocks, title titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle - if (any isHeader1 rest) || (any isHeader2 rest) + if (any (isHeader 1) rest) || (any (isHeader 2) rest) then ((Header 1 head1):(Header 2 head2):rest, []) else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) titleTransform ((Header 1 head1):rest) = -- title, no subtitle - if (any isHeader1 rest) + if (any (isHeader 1) rest) then ((Header 1 head1):rest, []) else ((promoteHeaders 1 rest), head1) titleTransform blocks = (blocks, []) parseRST = do - -- first pass: get anonymous keys - refs <- manyTill (referenceKey <|> (do l <- lineClump - return (LineClump l))) eof + -- first pass: get keys + refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof let keys = map (\(KeyBlock label target) -> (label, target)) $ filter isKeyBlock refs + -- second pass, with keys stripped out let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs - setInput $ concat rawlines -- with keys stripped out + setInput $ concat rawlines updateState (\state -> state { stateKeys = keys }) blocks <- parseBlocks let blocks' = filter (/= Null) blocks @@ -113,7 +100,7 @@ parseRST = do let authors = stateAuthors state let date = stateDate state let title' = if (null title) then (stateTitle state) else title - return (Pandoc (Meta title' authors date) blocks'') + return $ Pandoc (Meta title' authors date) blocks'' -- -- parsing blocks @@ -121,32 +108,39 @@ parseRST = do parseBlocks = manyTill block eof -block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, - imageBlock, unknownDirective, header, - hrule, list, fieldList, lineBlock, para, plain, - nullBlock ] <?> "block" +block = choice [ codeBlock + , rawHtmlBlock + , rawLaTeXBlock + , blockQuote + , imageBlock + , unknownDirective + , header + , hrule + , list + , fieldList + , lineBlock + , para + , plain + , nullBlock ] <?> "block" -- -- field list -- -fieldListItem = try (do +fieldListItem = try $ do char ':' name <- many1 alphaNum string ": " skipSpaces first <- manyTill anyChar newline - rest <- many (do - notFollowedBy (char ':') - notFollowedBy blankline - skipSpaces - manyTill anyChar newline ) - return (name, (joinWithSep " " (first:rest)))) - -fieldList = try (do + rest <- many (notFollowedBy ((char ':') <|> blankline) >> + skipSpaces >> manyTill anyChar newline) + return $ (name, (joinWithSep " " (first:rest))) + +fieldList = try $ do items <- many1 fieldListItem blanklines - let authors = case (lookup "Authors" items) of + let authors = case lookup "Authors" items of Just auth -> [auth] Nothing -> map snd (filter (\(x,y) -> x == "Author") items) let date = case (lookup "Date" items) of @@ -162,82 +156,74 @@ fieldList = try (do updateState (\st -> st { stateAuthors = authors, stateDate = date, stateTitle = title }) - return (BlockQuote result)) + return $ BlockQuote result -- -- line block -- -lineBlockLine = try (do +lineBlockLine = try $ do string "| " white <- many (oneOf " \t") line <- manyTill inline newline - let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak] - return line') + return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak] -lineBlock = try (do +lineBlock = try $ do lines <- many1 lineBlockLine blanklines - return $ Para (concat lines)) + return $ Para (concat lines) -- -- paragraph block -- -para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph" +para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph" -codeBlockStart = try (do - string "::" - blankline - blankline) +codeBlockStart = try $ string "::" >> blankline >> blankline -- paragraph that ends in a :: starting a code block -paraBeforeCodeBlock = try (do - result <- many1 (do {notFollowedBy' codeBlockStart; inline}) +paraBeforeCodeBlock = try $ do + result <- many1 (notFollowedBy' codeBlockStart >> inline) lookAhead (string "::") - return (Para (if (last result == Space) - then normalizeSpaces result - else (normalizeSpaces result) ++ [Str ":"]))) + return $ Para $ if last result == Space + then normalizeSpaces result + else (normalizeSpaces result) ++ [Str ":"] -- regular paragraph -paraNormal = try (do +paraNormal = try $ do result <- many1 inline newline blanklines - let result' = normalizeSpaces result - return (Para result')) + return $ Para $ normalizeSpaces result -plain = do - result <- many1 inline - let result' = normalizeSpaces result - return (Plain result') +plain = many1 inline >>= return . Plain . normalizeSpaces -- -- image block -- -imageBlock = try (do +imageBlock = try $ do string ".. image:: " src <- manyTill anyChar newline - return (Plain [Image [Str "image"] (src, "")])) + return $ Plain [Image [Str "image"] (src, "")] -- -- header blocks -- -header = choice [ doubleHeader, singleHeader ] <?> "header" +header = doubleHeader <|> singleHeader <?> "header" -- a header with lines on top and bottom -doubleHeader = try (do +doubleHeader = try $ do c <- oneOf underlineChars rest <- many (char c) -- the top line let lenTop = length (c:rest) skipSpaces newline - txt <- many1 (do {notFollowedBy blankline; inline}) - pos <- getPosition + txt <- many1 (notFollowedBy blankline >> inline) + pos <- getPosition let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else (do {return ()}) + if (len > lenTop) then fail "title longer than border" else return () blankline -- spaces and newline count lenTop (char c) -- the bottom line blanklines @@ -249,10 +235,10 @@ doubleHeader = try (do Just ind -> (headerTable, ind + 1) Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) setState (state { stateHeaderTable = headerTable' }) - return (Header level (normalizeSpaces txt))) + return $ Header level (normalizeSpaces txt) -- a header with line on the bottom only -singleHeader = try (do +singleHeader = try $ do notFollowedBy' whitespace txt <- many1 (do {notFollowedBy blankline; inline}) pos <- getPosition @@ -268,19 +254,19 @@ singleHeader = try (do Just ind -> (headerTable, ind + 1) Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) setState (state { stateHeaderTable = headerTable' }) - return (Header level (normalizeSpaces txt))) + return $ Header level (normalizeSpaces txt) -- -- hrule block -- -hruleWith chr = try (do +hruleWith chr = try $ do count 4 (char chr) skipMany (char chr) skipSpaces newline blanklines - return HorizontalRule) + return HorizontalRule hrule = choice (map hruleWith underlineChars) <?> "hrule" @@ -289,15 +275,16 @@ hrule = choice (map hruleWith underlineChars) <?> "hrule" -- -- read a line indented by a given string -indentedLine indents = try (do +indentedLine indents = try $ do string indents result <- manyTill anyChar newline - return (result ++ "\n")) + return $ result ++ "\n" --- two or more indented lines, possibly separated by blank lines --- if variable = True, then any indent will work, but it must be consistent through the block --- if variable = False, indent should be one tab or equivalent in spaces -indentedBlock variable = try (do +-- two or more indented lines, possibly separated by blank lines. +-- if variable = True, then any indent will work, but it must be +-- consistent through the block. +-- if variable = False, indent should be one tab or equivalent in spaces. +indentedBlock variable = try $ do state <- getState let tabStop = stateTabStop state indents <- if variable @@ -305,51 +292,47 @@ indentedBlock variable = try (do else oneOfStrings ["\t", (replicate tabStop ' ')] firstline <- manyTill anyChar newline rest <- many (choice [ indentedLine indents, - try (do - b <- blanklines - l <- indentedLine indents - return (b ++ l))]) - option "" blanklines - return (firstline ++ "\n" ++ (concat rest))) - -codeBlock = try (do + try (do b <- blanklines + l <- indentedLine indents + return (b ++ l))]) + optional blanklines + return $ firstline ++ "\n" ++ concat rest + +codeBlock = try $ do codeBlockStart result <- indentedBlock False -- the False means we want one tab stop indent on each line - return (CodeBlock (stripTrailingNewlines result))) + return $ CodeBlock $ stripTrailingNewlines result -- -- raw html -- -rawHtmlBlock = try (do - string ".. raw:: html" - blanklines - result <- indentedBlock True - return (RawHtml result)) +rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> + indentedBlock True >>= return . RawHtml -- -- raw latex -- -rawLaTeXBlock = try (do +rawLaTeXBlock = try $ do string ".. raw:: latex" blanklines result <- indentedBlock True - return (Para [(TeX result)])) + return $ Para [(TeX result)] -- -- block quotes -- -blockQuote = try (do +blockQuote = try $ do raw <- indentedBlock True -- parse the extracted block, which may contain various block elements: rest <- getInput setInput $ raw ++ "\n\n" contents <- parseBlocks setInput rest - return (BlockQuote contents)) + return $ BlockQuote contents -- -- list blocks @@ -369,15 +352,14 @@ definitionListItem = try $ do definitionList = try $ do items <- many1 definitionListItem - return (DefinitionList items) + return $ DefinitionList items -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart = try (do +bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers white <- many1 spaceChar - let len = length (marker:white) - return len) + return $ length (marker:white) -- parses ordered list start and returns its length (inc following whitespace) orderedListStart style delim = try $ do @@ -386,11 +368,11 @@ orderedListStart style delim = try $ do return $ markerLen + length white -- parse a line of a list item -listLine markerLength = try (do +listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength line <- manyTill anyChar newline - return (line ++ "\n")) + return $ line ++ "\n" -- indent by specified number of spaces (or equiv. tabs) indentWith num = do @@ -399,7 +381,7 @@ indentWith num = do if (num < tabStop) then count num (char ' ') else choice [ try (count num (char ' ')), - (try (do {char '\t'; count (num - tabStop) (char ' ')})) ] + (try (char '\t' >> count (num - tabStop) (char ' '))) ] -- parse raw text for one list item, excluding start marker and continuations rawListItem start = try $ do @@ -411,19 +393,16 @@ rawListItem start = try $ do -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation markerLength = try (do +listContinuation markerLength = try $ do blanks <- many1 blankline result <- many1 (listLine markerLength) - return (blanks ++ (concat result))) + return $ blanks ++ concat result -listItem start = try (do +listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) - blanks <- choice [ try (do - b <- many blankline - lookAhead start - return b), - many1 blankline ] -- whole list must end with blank + blanks <- choice [ try (many blankline >>~ lookAhead start), + many1 blankline ] -- whole list must end with blank. -- parsing with ListItemState forces markers at beginning of lines to -- count as list item markers, even if not separated by blank space. -- see definition of "endline" @@ -436,52 +415,44 @@ listItem start = try (do parsed <- parseBlocks setInput remaining updateState (\st -> st {stateParserContext = oldContext}) - return parsed) + return parsed orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListMarker items <- many1 (listItem (orderedListStart style delim)) let items' = compactify items - return (OrderedList (start, style, delim) items') + return $ OrderedList (start, style, delim) items' -bulletList = try (do +bulletList = try $ do items <- many1 (listItem bulletListStart) let items' = compactify items - return (BulletList items')) + return $ BulletList items' -- -- unknown directive (e.g. comment) -- -unknownDirective = try (do +unknownDirective = try $ do string ".. " manyTill anyChar newline - many (do - string " " - char ':' - many1 (noneOf "\n:") - char ':' - many1 (noneOf "\n") - newline) - option "" blanklines - return Null) + many (string " :" >> many1 (noneOf "\n:") >> char ':' >> + many1 (noneOf "\n") >> newline) + optional blanklines + return Null -- -- reference key -- -referenceKey = do - result <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] - option "" blanklines - return result +referenceKey = + choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] >>~ + optional blanklines targetURI = try $ do skipSpaces - option ' ' newline - contents <- many1 (try (do many spaceChar - newline - many1 spaceChar - noneOf " \t\n") <|> noneOf "\n") + optional newline + contents <- many1 (try (many spaceChar >> newline >> + many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") blanklines return contents @@ -516,71 +487,73 @@ regularKey = try $ do -- inline -- -inline = choice [ superscript, subscript, - escapedChar, link, image, hyphens, strong, emph, code, - str, tabchar, whitespace, endline, symbol ] <?> "inline" - -hyphens = try (do +inline = choice [ superscript + , subscript + , escapedChar + , link + , image + , hyphens + , strong + , emph + , code + , str + , tabchar + , whitespace + , endline + , symbol ] <?> "inline" + +hyphens = try $ do result <- many1 (char '-') option Space endline -- don't want to treat endline after hyphen or dash as a space - return (Str result)) + return $ Str result escapedChar = escaped anyChar symbol = do result <- oneOf specialChars - return (Str [result]) + return $ Str [result] -- parses inline code, between codeStart and codeEnd -code = try (do +code = try $ do string "``" result <- manyTill anyChar (try (string "``")) - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result - return (Code result')) + return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result -emph = do - result <- enclosed (char '*') (char '*') inline - return (Emph (normalizeSpaces result)) +emph = enclosed (char '*') (char '*') inline >>= + return . Emph . normalizeSpaces -strong = do - result <- enclosed (string "**") (string "**") inline - return (Strong (normalizeSpaces result)) +strong = enclosed (string "**") (string "**") inline >>= + return . Strong . normalizeSpaces interpreted role = try $ do - option "" (try $ string "\\ ") + optional $ try $ string "\\ " result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar nextChar <- lookAhead anyChar try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "") return [Str result] superscript = interpreted "sup" >>= (return . Superscript) + subscript = interpreted "sub" >>= (return . Subscript) -whitespace = do - many1 spaceChar <?> "whitespace" - return Space +whitespace = many1 spaceChar >> return Space <?> "whitespace" -tabchar = do - tab - return (Str "\t") +tabchar = tab >> return (Str "\t") -str = do - notFollowedBy' oneWordReference - result <- many1 (noneOf (specialChars ++ "\t\n ")) - return (Str result) +str = notFollowedBy' oneWordReference >> + many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str -- an endline character that can be treated as a space, not a structural break -endline = try (do +endline = try $ do newline notFollowedBy blankline -- parse potential list-starts at beginning of line differently in a list: st <- getState if ((stateParserContext st) == ListItemState) - then do notFollowedBy' anyOrderedListMarker - notFollowedBy' bulletListStart - else option () pzero - return Space) + then notFollowedBy' anyOrderedListMarker >> notFollowedBy' bulletListStart + else return () + return Space -- -- links @@ -628,10 +601,10 @@ referenceLink = try $ do uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", "mailto:", "news:", "telnet:" ] -uri = try (do +uri = try $ do scheme <- uriScheme identifier <- many1 (noneOf " \t\n") - return (scheme ++ identifier)) + return $ scheme ++ identifier autoURI = try $ do src <- uri @@ -639,20 +612,20 @@ autoURI = try $ do emailChar = alphaNum <|> oneOf "-+_." -emailAddress = try (do +emailAddress = try $ do firstLetter <- alphaNum restAddr <- many emailChar let addr = firstLetter:restAddr char '@' dom <- domain - return (addr ++ '@':dom)) + return $ addr ++ '@':dom domainChar = alphaNum <|> char '-' -domain = try (do +domain = try $ do first <- many1 domainChar dom <- many1 (try (do{ char '.'; many1 domainChar })) - return (joinWithSep "." (first:dom))) + return $ joinWithSep "." (first:dom) autoEmail = try $ do src <- emailAddress @@ -669,5 +642,5 @@ image = try $ do src <- case lookupKeySrc keyTable ref of Nothing -> fail "no corresponding key" Just target -> return target - return (Image (normalizeSpaces ref) src) + return $ Image (normalizeSpaces ref) src |