From 05cbdf04fdbb4290197b201a720f3e1796efee5e Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Mon, 24 Dec 2007 04:22:41 +0000 Subject: Markdown: better handling of parentheses in URLs and quotation marks in titles. + source parser first tries to parse URL with balanced parentheses; if that doesn't work, it tries to parse everything beginning with '(' and ending with ')'. + source parser now uses an auxiliary function source'. + linkTitle parser simplified and improved, under assumption that it will be called in context of source'. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1156 788f1e2b-df1e-0410-8736-df70ead52e1b --- Text/Pandoc/Readers/Markdown.hs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'Text/Pandoc/Readers') diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs index 9dedd1fef..e9befec12 100644 --- a/Text/Pandoc/Readers/Markdown.hs +++ b/Text/Pandoc/Readers/Markdown.hs @@ -831,23 +831,30 @@ reference = do notFollowedBy' (string "[^") -- footnote reference return $ normalizeSpaces result -- source for a link, with optional title -source = try $ do - char '(' +source = + (try $ charsInBalanced '(' ')' >>= parseFromString source') <|> + -- the following is needed for cases like: [ref](/url(a). + (enclosed (char '(') (char ')') anyChar >>= + parseFromString source') + +-- auxiliary function for source +source' = do + skipSpaces src <- try (char '<' >> - many ((char '\\' >> anyChar) <|> noneOf "> \t\n") >>~ + many (optional (char '\\') >> noneOf "> \t\n") >>~ char '>') - <|> many ((char '\\' >> anyChar) <|> noneOf ") \t\n") + <|> many (optional (char '\\') >> noneOf " \t\n") tit <- option "" linkTitle skipSpaces - char ')' + eof return (removeTrailingSpace src, tit) linkTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces - delim <- char '\'' <|> char '"' - tit <- manyTill anyChar (try (char delim >> skipSpaces >> - notFollowedBy (noneOf ")\n"))) + delim <- oneOf "'\"" + tit <- manyTill (optional (char '\\') >> anyChar) + (try (char delim >> skipSpaces >> eof)) return $ decodeCharacterReferences tit link = try $ do -- cgit v1.2.3