aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Parsing.hs17
-rw-r--r--tests/Tests/Readers/Markdown.hs5
2 files changed, 18 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index daf8e867d..110e34c6a 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -178,7 +178,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Parsec hiding (token)
import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
- isHexDigit, isSpace )
+ isHexDigit, isSpace, isPunctuation )
import Data.List ( intercalate, transpose, isSuffixOf )
import Text.Pandoc.Shared
import qualified Data.Map as M
@@ -405,9 +405,18 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
domain = intercalate "." <$> (subdomain `sepby1` dot)
dot = char '.'
subdomain = many1 $ alphaNum <|> innerPunct
- innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@') <*
- notFollowedBy space)
- emailWord = many1 $ satisfy isEmailChar
+ -- this excludes some valid email addresses, since an
+ -- email could contain e.g. '__', but gives better results
+ -- for our purposes, when combined with markdown parsing:
+ innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@')
+ <* notFollowedBy space
+ <* notFollowedBy (satisfy isPunctuation))
+ -- technically an email address could begin with a symbol,
+ -- but allowing this creates too many problems.
+ -- See e.g. https://github.com/jgm/pandoc/issues/2940
+ emailWord = do x <- satisfy isAlphaNum
+ xs <- many (satisfy isEmailChar)
+ return (x:xs)
isEmailChar c = isAlphaNum c || isEmailPunct c
isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;"
-- note: sepBy1 from parsec consumes input when sep
diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs
index 099d75b62..e877b81ca 100644
--- a/tests/Tests/Readers/Markdown.hs
+++ b/tests/Tests/Readers/Markdown.hs
@@ -185,6 +185,11 @@ tests = [ testGroup "inline code"
"<\n\na>" =?>
para (text "<") <> para (text "a>")
]
+ , testGroup "raw email addresses"
+ [ test markdownGH "issue 2940" $
+ "**@user**" =?>
+ para (strong (text "@user"))
+ ]
, testGroup "emoji"
[ test markdownGH "emoji symbols" $
":smile: and :+1:" =?> para (text "😄 and 👍")