aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Parsing.hs7
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs22
2 files changed, 24 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 0bb794ba1..082d9565b 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -693,13 +693,12 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
mailbox = intercalate "." <$> (emailWord `sepBy1'` dot)
domain = intercalate "." <$> (subdomain `sepBy1'` dot)
dot = char '.'
- subdomain = many1 $ alphaNum <|> innerPunct
+ subdomain = many1 $ alphaNum <|> innerPunct (=='-')
-- this excludes some valid email addresses, since an
-- email could contain e.g. '__', but gives better results
-- for our purposes, when combined with markdown parsing:
- innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@')
- <* notFollowedBy space
- <* notFollowedBy (satisfy isPunctuation))
+ innerPunct f = try (satisfy f
+ <* notFollowedBy (satisfy (not . isAlphaNum)))
-- technically an email address could begin with a symbol,
-- but allowing this creates too many problems.
-- See e.g. https://github.com/jgm/pandoc/issues/2940
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 6f92f0063..b5cf5a0f3 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -478,9 +478,29 @@ macro = try $ do
-- raw URLs in text are automatically linked
url :: T2T Inlines
url = try $ do
- (rawUrl, escapedUrl) <- try uri <|> emailAddress
+ (rawUrl, escapedUrl) <- try uri <|> emailAddress'
return $ B.link rawUrl "" (B.str escapedUrl)
+emailAddress' :: T2T (Text, Text)
+emailAddress' = do
+ (base, mailURI) <- emailAddress
+ query <- option "" emailQuery
+ return (base <> query, mailURI <> query)
+
+emailQuery :: T2T Text
+emailQuery = do
+ char '?'
+ parts <- kv `sepBy1` (char '&')
+ return $ "?" <> T.intercalate "&" parts
+
+kv :: T2T Text
+kv = do
+ k <- T.pack <$> many1 alphaNum
+ char '='
+ let vchar = alphaNum <|> try (oneOf "%._/~:,=$@&+-;*" <* lookAhead alphaNum)
+ v <- T.pack <$> many1 vchar
+ return (k <> "=" <> v)
+
uri :: T2T (Text, Text)
uri = try $ do
address <- t2tURI