From ee0fc19bc54208c5d8828eab872e3bbe303c47bf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 28 Mar 2013 11:33:01 -0700 Subject: Parsing: Further improvements to uri parser. Don't treat punctuation before percent-encoding as final punctuation. Don't treat '+' as final punctuation. --- src/Text/Pandoc/Parsing.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 31f3d2525..c5e77bec2 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -425,7 +425,8 @@ uri = try $ do -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) -- as a URL, while NOT picking up the closing paren in -- (http://wikipedia.org). So we include balanced parens in the URL. - let isWordChar c = isAlphaNum c || c == '_' || c == '/' || not (isAscii c) + let isWordChar c = isAlphaNum c || c == '_' || c == '/' || c == '+' || + not (isAscii c) let wordChar = satisfy isWordChar let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit) let entity = () <$ characterReference @@ -434,7 +435,8 @@ uri = try $ do let uriChunk = skipMany1 wordChar <|> percentEscaped <|> entity - <|> (try $ punct >> lookAhead (satisfy isWordChar) >> return ()) + <|> (try $ punct >> + lookAhead (void (satisfy isWordChar) <|> percentEscaped)) str <- snd `fmap` withRaw (skipMany1 ( () <$ (enclosed (char '(') (char ')') uriChunk <|> enclosed (char '{') (char '}') uriChunk -- cgit v1.2.3