From 76d462c1cd3e7b25e09d7d7efb23ff4a017344f2 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 1 Sep 2007 17:22:22 +0000 Subject: Simplify autolink parsing code, using Network.URI to test for URIs. Added dependency on network library to debian/control and pandoc.cabal. git-svn-id: https://pandoc.googlecode.com/svn/trunk@982 788f1e2b-df1e-0410-8736-df70ead52e1b --- debian/control | 2 +- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/Markdown.hs | 49 ++++++++++++++++++------------------- 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/debian/control b/debian/control index 811b485cb..402859536 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: pandoc Section: text Priority: optional Maintainer: Recai Oktaş -Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.12), ghc6 (>= 6.6-1), libghc6-xhtml-dev, libghc6-mtl-dev, perl +Build-Depends: debhelper (>= 4.0.0), haskell-devscripts (>=0.5.12), ghc6 (>= 6.6-1), libghc6-xhtml-dev, libghc6-mtl-dev, libghc6-network-dev, perl Build-Depends-Indep: haddock Standards-Version: 3.7.2.0 XS-Vcs-Svn: http://pandoc.googlecode.com/svn/trunk diff --git a/pandoc.cabal b/pandoc.cabal index efe5da33d..e24adee9c 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -31,7 +31,7 @@ Description: Pandoc is a Haskell library for converting from one markup which convert this native representation into a target format. Thus, adding an input or output format requires only adding a reader or writer. -Build-Depends: base, parsec, xhtml, mtl, regex-compat +Build-Depends: base, parsec, xhtml, mtl, regex-compat, network Hs-Source-Dirs: src Exposed-Modules: Text.Pandoc, Text.Pandoc.Blocks, diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ac98fcba6..1583e5e7b 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,9 +31,10 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( transpose, isSuffixOf, lookup, sortBy ) +import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy ) import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) +import Network.URI ( isURI ) import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) @@ -738,9 +739,9 @@ doubleQuoted = try $ do failIfInQuoteContext context = do st <- getState - if (stateQuoteContext st == context) - then fail "already inside quotes" - else return () + if stateQuoteContext st == context + then fail "already inside quotes" + else return () singleQuoteStart = do failIfInQuoteContext InSingleQuote @@ -839,32 +840,30 @@ referenceLink label = do Nothing -> fail "no corresponding key" Just target -> return target -autoLink = autoLinkEmail <|> autoLinkRegular +emailAddress = try $ do + name <- many1 (alphaNum <|> char '+') + char '@' + first <- many1 alphaNum + rest <- many1 (char '.' >> many1 alphaNum) + return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest) --- a link -autoLinkEmail = try $ do - char '<' - name <- many1Till (noneOf "/:<> \t\n") (char '@') - domain <- sepBy1 (many1 (noneOf "/:.@<> \t\n")) (char '.') - char '>' - let src = name ++ "@" ++ (joinWithSep "." domain) - txt <- autoLinkText src - return $ Link txt (("mailto:" ++ src), "") +uri = try $ do + str <- many1 (noneOf "\n\t >") + if isURI str + then return str + else fail "not a URI" --- a link -autoLinkRegular = try $ do +autoLink = try $ do char '<' - prot <- oneOfStrings ["http:", "ftp:", "mailto:"] - rest <- many1Till (noneOf " \t\n<>") (char '>') - let src = prot ++ rest - txt <- autoLinkText src - return $ Link txt (src, "") - -autoLinkText src = do + src <- uri <|> emailAddress + char '>' + let src' = if "mailto:" `isPrefixOf` src + then drop 7 src + else src st <- getState return $ if stateStrict st - then [Str src] - else [Code src] + then Link [Str src'] (src, "") + else Link [Code src'] (src, "") image = try $ do char '!' -- cgit v1.2.3