aboutsummaryrefslogtreecommitdiff
path: root/Text/Pandoc/Shared.hs
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-12-21 16:13:10 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-12-21 16:13:10 +0000
commit0681d1d3e7ccf0cddfd452957c2d792df014064b (patch)
treed488f770a14d5123edcb1ff9b0075d6d7e30d711 /Text/Pandoc/Shared.hs
parent246e5f9ea3bd921d28521c9121d686a0c8e50ee6 (diff)
downloadpandoc-0681d1d3e7ccf0cddfd452957c2d792df014064b.tar.gz
Fixed handling of email addresses in markdown and reStructuredText.
Consolidated uri and email address parsers. (Resolves Issue #37.) + New emailAddress and uri parsers in Text.Pandoc.Shared. uri parser uses parseURI from Network.URI. emailAddress parser properly handles email addresses with periods in them. + Removed uri and emailAddress parsers from Text.Pandoc.Readers.RST. + Removed uri and emailAddress parsers from Text.Pandoc.Readers.Markdown. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1149 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Text/Pandoc/Shared.hs')
-rw-r--r--Text/Pandoc/Shared.hs35
1 files changed, 35 insertions, 0 deletions
diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs
index 32cc2ce59..7086ca452 100644
--- a/Text/Pandoc/Shared.hs
+++ b/Text/Pandoc/Shared.hs
@@ -64,6 +64,8 @@ module Text.Pandoc.Shared (
charsInBalanced,
charsInBalanced',
romanNumeral,
+ emailAddress,
+ uri,
withHorizDisplacement,
nullBlock,
failIfStrict,
@@ -105,6 +107,7 @@ import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
import Data.List ( find, isPrefixOf )
import Control.Monad ( join )
+import Network.URI ( parseURI, URI (..), isAllowedInURI )
--
-- List processing
@@ -404,6 +407,38 @@ romanNumeral upperCase = do
then fail "not a roman numeral"
else return total
+-- Parsers for email addresses and URIs
+
+emailChar = alphaNum <|> oneOf "-+_."
+
+domainChar = alphaNum <|> char '-'
+
+domain = do
+ first <- many1 domainChar
+ dom <- many1 (try (do{ char '.'; many1 domainChar }))
+ return $ joinWithSep "." (first:dom)
+
+-- | Parses an email address; returns string.
+emailAddress :: GenParser Char st [Char]
+emailAddress = try $ do
+ firstLetter <- alphaNum
+ restAddr <- many emailChar
+ let addr = firstLetter:restAddr
+ char '@'
+ dom <- domain
+ return $ addr ++ '@':dom
+
+-- | Parses a URI.
+uri = try $ do
+ str <- many1 $ satisfy isAllowedInURI
+ case parseURI str of
+ Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:",
+ "file:", "mailto:",
+ "news:", "telnet:" ]
+ then return $ show uri'
+ else fail "not a URI"
+ Nothing -> fail "not a URI"
+
-- | Applies a parser, returns tuple of its results and its horizontal
-- displacement (the difference between the source column at the end
-- and the source column at the beginning). Vertical displacement