From 9cdfd4f6491cbf9e3002e88be8be9ebeb48ba2bb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 14 Jul 2015 10:20:09 -0700 Subject: Improved bare autolink detection. Previously we disallowed `-` at the end of an autolink, and disallowed the combination `=-`. This commit liberalizes the rules for allowing punctuation in a bare URI. Added test cases. One potential drawback is that you can no longer put a bare URI in em dashes like this this uri---http://example.com---is an example. But in this respect we now match github's treatment of bare URIs. Closes #2299. --- src/Text/Pandoc/Parsing.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 82e7e2c33..5dc991be2 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -448,13 +448,12 @@ uri :: Stream [Char] m Char => ParserT [Char] st m (String, String) uri = try $ do scheme <- uriScheme char ':' - -- We allow punctuation except at the end, since + -- We allow sentence punctuation except at the end, since -- we don't want the trailing '.' in 'http://google.com.' We want to allow -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) -- as a URL, while NOT picking up the closing paren in -- (http://wikipedia.org). So we include balanced parens in the URL. - let isWordChar c = isAlphaNum c || c == '_' || c == '/' || c == '+' || - not (isAscii c) + let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-" let wordChar = satisfy isWordChar let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit) let entity = () <$ characterReference -- cgit v1.2.3