aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2013-01-09 17:19:32 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2013-01-09 17:19:32 -0800
commita71641a2a04c1d324163e16299f1e9821a26c9f9 (patch)
tree15a613e0d1909084caa3930606e3f8696aab7102 /src
parent0998f774ced02c8298e2427853d196b392bba299 (diff)
downloadpandoc-a71641a2a04c1d324163e16299f1e9821a26c9f9.tar.gz
Made email parser more correct.
Now it's based on RFC 822, though it still doesn't implement quoted strings in email addresses.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Parsing.hs26
1 files changed, 14 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 6277020a5..06919c888 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -333,25 +333,27 @@ romanNumeral upperCase = do
-- Parsers for email addresses and URIs
emailChar :: Parser [Char] st Char
-emailChar = alphaNum <|>
- satisfy (\c -> c == '-' || c == '+' || c == '_' || c == '.')
+emailChar = alphaNum <|> oneOf "!\"#$%&'*+-/0123456789=?^_{|}~"
-domainChar :: Parser [Char] st Char
-domainChar = alphaNum <|> char '-'
-
-domain :: Parser [Char] st [Char]
+domain :: Parser [Char] st String
domain = do
- first <- many1 domainChar
- dom <- many1 $ try (char '.' >> many1 domainChar )
- return $ intercalate "." (first:dom)
+ x <- subdomain
+ xs <- many (try $ char '.' >> subdomain)
+ return $ intercalate "." (x:xs)
+
+subdomain :: Parser [Char] st String
+subdomain = many1 (emailChar <|> char '@')
+
+emailWord :: Parser [Char] st String
+emailWord = many1 emailChar -- ignores possibility of quoted strings
-- | Parses an email address; returns original and corresponding
-- escaped mailto: URI.
emailAddress :: Parser [Char] st (String, String)
emailAddress = try $ do
- firstLetter <- alphaNum
- restAddr <- many emailChar
- let addr = firstLetter:restAddr
+ x <- emailWord
+ xs <- many (try $ char '.' >> emailWord)
+ let addr = intercalate "." (x:xs)
char '@'
dom <- domain
let full = addr ++ '@':dom