diff options
author | John MacFarlane <jgm@berkeley.edu> | 2010-03-23 15:06:18 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2010-03-23 15:06:18 -0700 |
commit | 85ca50d623a8b8e789615b48282cba648b5c558a (patch) | |
tree | 52c6f29f431bdd0b5930a5136968db8c0ba5a7e1 /src/Text/Pandoc | |
parent | 921d0f30815509af5a75979d6b61e1ff49df5f69 (diff) | |
download | pandoc-85ca50d623a8b8e789615b48282cba648b5c558a.tar.gz |
Shared: Rewrote uri and emailAddress to return original text + escaped URI.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 29 |
1 files changed, 17 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index b9324b7e4..72772303e 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -115,7 +115,7 @@ import Text.ParserCombinators.Parsec import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest ) import qualified Text.PrettyPrint.HughesPJ as PP import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, +import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, isAscii, isPunctuation ) import Data.List ( find, isPrefixOf, intercalate ) import Network.URI ( parseURI, URI (..), isAllowedInURI, escapeURIString ) @@ -487,25 +487,30 @@ domain = do dom <- many1 $ try (char '.' >> many1 domainChar ) return $ intercalate "." (first:dom) --- | Parses an email address; returns string. -emailAddress :: GenParser Char st [Char] +-- | Parses an email address; returns original and corresponding +-- escaped mailto: URI. +emailAddress :: GenParser Char st (String, String) emailAddress = try $ do firstLetter <- alphaNum restAddr <- many emailChar let addr = firstLetter:restAddr char '@' dom <- domain - return $ addr ++ '@':dom + let full = addr ++ '@':dom + return (full, escapeURI $ "mailto:" ++ full) --- | Parses a URI. -uri :: GenParser Char st String +-- | Parses a URI. Returns pair of original and URI-escaped version. +uri :: GenParser Char st (String, String) uri = try $ do - str <- many1 $ satisfy isAllowedInURI - case parseURI str of - Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:", - "file:", "mailto:", - "news:", "telnet:" ] - then return $ show uri' + let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", + "news:", "telnet:" ] + lookAhead $ oneOfStrings protocols + -- scan non-ascii characters and ascii characters allowed in a URI + str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c) + -- now see if they amount to an absolute URI + case parseURI (escapeURI str) of + Just uri' -> if uriScheme uri' `elem` protocols + then return (str, show uri') else fail "not a URI" Nothing -> fail "not a URI" |