diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-12-21 16:13:10 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-12-21 16:13:10 +0000 |
commit | 0681d1d3e7ccf0cddfd452957c2d792df014064b (patch) | |
tree | d488f770a14d5123edcb1ff9b0075d6d7e30d711 /Text/Pandoc/Shared.hs | |
parent | 246e5f9ea3bd921d28521c9121d686a0c8e50ee6 (diff) | |
download | pandoc-0681d1d3e7ccf0cddfd452957c2d792df014064b.tar.gz |
Fixed handling of email addresses in markdown and reStructuredText.
Consolidated uri and email address parsers. (Resolves Issue #37.)
+ New emailAddress and uri parsers in Text.Pandoc.Shared.
uri parser uses parseURI from Network.URI. emailAddress
parser properly handles email addresses with periods in them.
+ Removed uri and emailAddress parsers from Text.Pandoc.Readers.RST.
+ Removed uri and emailAddress parsers from Text.Pandoc.Readers.Markdown.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1149 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Text/Pandoc/Shared.hs')
-rw-r--r-- | Text/Pandoc/Shared.hs | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs index 32cc2ce59..7086ca452 100644 --- a/Text/Pandoc/Shared.hs +++ b/Text/Pandoc/Shared.hs @@ -64,6 +64,8 @@ module Text.Pandoc.Shared ( charsInBalanced, charsInBalanced', romanNumeral, + emailAddress, + uri, withHorizDisplacement, nullBlock, failIfStrict, @@ -105,6 +107,7 @@ import Text.Pandoc.CharacterReferences ( characterReference ) import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) import Data.List ( find, isPrefixOf ) import Control.Monad ( join ) +import Network.URI ( parseURI, URI (..), isAllowedInURI ) -- -- List processing @@ -404,6 +407,38 @@ romanNumeral upperCase = do then fail "not a roman numeral" else return total +-- Parsers for email addresses and URIs + +emailChar = alphaNum <|> oneOf "-+_." + +domainChar = alphaNum <|> char '-' + +domain = do + first <- many1 domainChar + dom <- many1 (try (do{ char '.'; many1 domainChar })) + return $ joinWithSep "." (first:dom) + +-- | Parses an email address; returns string. +emailAddress :: GenParser Char st [Char] +emailAddress = try $ do + firstLetter <- alphaNum + restAddr <- many emailChar + let addr = firstLetter:restAddr + char '@' + dom <- domain + return $ addr ++ '@':dom + +-- | Parses a URI. +uri = try $ do + str <- many1 $ satisfy isAllowedInURI + case parseURI str of + Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:", + "file:", "mailto:", + "news:", "telnet:" ] + then return $ show uri' + else fail "not a URI" + Nothing -> fail "not a URI" + -- | Applies a parser, returns tuple of its results and its horizontal -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement |