From 890fbe97ecd9546ec1872a8ebf37551fe8771a0f Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Wed, 24 Jan 2007 19:44:43 +0000 Subject: Refactored markdown reader so that Text.Regex is not used. Replaced email regex test with a custom email autolink parser (autoLinkEmail). Also replaced 'selfClosingTag' with a custom function 'isSelfClosingTag'. git-svn-id: https://pandoc.googlecode.com/svn/trunk@511 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Readers/Markdown.hs | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 6e327a2ef..5354574b9 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( findIndex, sortBy, transpose ) +import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect ) import Data.Char ( isAlphaNum ) import Text.ParserCombinators.Pandoc import Text.Pandoc.Definition @@ -43,7 +43,6 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, htmlEndTag, extractTagType, htmlBlockElement ) import Text.Pandoc.Entities ( decodeEntities ) -import Text.Regex ( matchRegex, mkRegex ) import Text.ParserCombinators.Parsec -- | Read markdown from an input string and return a Pandoc document. @@ -474,14 +473,13 @@ htmlBlock = do else rawHtmlBlocks -- True if tag is self-closing -selfClosing tag = case (matchRegex (mkRegex "\\/[[:space:]]*>$") tag) of - Just _ -> True - Nothing -> False +isSelfClosing tag = + isSuffixOf "/>" $ filter (\c -> (not (c `elem` " \n\t"))) tag strictHtmlBlock = try (do tag <- anyHtmlBlockTag let tag' = extractTagType tag - if selfClosing tag || tag' == "hr" + if isSelfClosing tag || tag' == "hr" then return tag else do contents <- many (do{notFollowedBy' (htmlEndTag tag'); @@ -930,17 +928,24 @@ referenceLinkSingle = try (do else fail "no corresponding key" return (Link label (Ref label))) --- a link -autoLink = try (do +autoLink = do notFollowedBy' (anyHtmlTag <|> anyHtmlEndTag) + autoLinkEmail <|> autoLinkRegular + +-- a link +autoLinkEmail = try $ do + char autoLinkStart + name <- many1Till (noneOf "/:<> \t\n") (char '@') + domain <- sepBy1 (many1 (noneOf "/:.@<> \t\n")) (char '.') + let src = name ++ "@" ++ (joinWithSep "." domain) + char autoLinkEnd + return $ Link [Str src] (Src ("mailto:" ++ src) "") + +-- a link +autoLinkRegular = try $ do src <- between (char autoLinkStart) (char autoLinkEnd) (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd]))) - case (matchRegex emailAddress src) of - Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) "")) - Nothing -> return (Link [Str src] (Src src ""))) - -emailAddress = - mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace + return $ Link [Str src] (Src src "") image = try (do char imageStart -- cgit v1.2.3