diff options
author | John MacFarlane <jgm@berkeley.edu> | 2010-03-23 15:07:48 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2010-03-23 15:07:48 -0700 |
commit | c87d52223ad97ae7f21c69b09ec7b1467bd35670 (patch) | |
tree | c6dc822057ff7b05de2371d0794f49eed8117fa2 | |
parent | c3384bb3f147e61d9eba8dd2d3ce0b1450003d4a (diff) | |
download | pandoc-c87d52223ad97ae7f21c69b09ec7b1467bd35670.tar.gz |
Properly escape URIs in all readers.
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 13 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 25 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 37 |
4 files changed, 37 insertions, 44 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e68592b95..5c188e3d9 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -182,7 +182,7 @@ unsanitaryURI u = "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:", "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:", "snews:", "webcal:", "ymsgr:"] - in case parseURIReference (stringToURI u) of + in case parseURIReference (escapeURI u) of Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes Nothing -> True @@ -746,7 +746,7 @@ link = try $ do Nothing -> fail "no href" let title = fromMaybe "" $ extractAttribute "title" attributes lab <- inlinesTilEnd "a" - return $ Link (normalizeSpaces lab) (url, title) + return $ Link (normalizeSpaces lab) (escapeURI url, title) image :: GenParser Char ParserState Inline image = try $ do @@ -756,5 +756,5 @@ image = try $ do Nothing -> fail "no src" let title = fromMaybe "" $ extractAttribute "title" attributes let alt = fromMaybe "" (extractAttribute "alt" attributes) - return $ Image [Str alt] (url, title) + return $ Image [Str alt] (escapeURI url, title) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index a2df27c07..36940fab0 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -738,7 +738,7 @@ url :: GenParser Char ParserState Inline url = try $ do string "\\url" url' <- charsInBalanced '{' '}' - return $ Link [Code url'] (url', "") + return $ Link [Code url'] (escapeURI url', "") link :: GenParser Char ParserState Inline link = try $ do @@ -746,17 +746,16 @@ link = try $ do url' <- manyTill anyChar (char '}') char '{' label' <- manyTill inline (char '}') - return $ Link (normalizeSpaces label') (url', "") + return $ Link (normalizeSpaces label') (escapeURI url', "") image :: GenParser Char ParserState Inline image = try $ do ("includegraphics", _, args) <- command let args' = filter isArg args -- filter out options - let src = if null args' then - ("", "") - else - (stripFirstAndLast (head args'), "") - return $ Image [Str "image"] src + let (src,tit) = case args' of + [] -> ("", "") + (x:_) -> (stripFirstAndLast x, "") + return $ Image [Str "image"] (escapeURI src, tit) footnote :: GenParser Char ParserState Inline footnote = try $ do diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index fafd526e6..13edd0586 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( transpose, isPrefixOf, isSuffixOf, sortBy, findIndex, intercalate ) +import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) import Data.Maybe @@ -73,10 +73,6 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221;" -- auxiliary functions -- --- | Replace spaces with %20 -uriEscapeSpaces :: String -> String -uriEscapeSpaces = substitute " " "%20" - indentSpaces :: GenParser Char ParserState [Char] indentSpaces = try $ do state <- getState @@ -206,7 +202,7 @@ referenceKey = try $ do tit <- option "" referenceTitle blanklines endPos <- getPosition - let newkey = (lab, (uriEscapeSpaces $ removeTrailingSpace src, tit)) + let newkey = (lab, (escapeURI $ removeTrailingSpace src, tit)) st <- getState let oldkeys = stateKeys st updateState $ \s -> s { stateKeys = newkey : oldkeys } @@ -1194,7 +1190,7 @@ source' = do tit <- option "" linkTitle skipSpaces eof - return (uriEscapeSpaces $ removeTrailingSpace src, tit) + return (escapeURI $ removeTrailingSpace src, tit) linkTitle :: GenParser Char st String linkTitle = try $ do @@ -1208,11 +1204,11 @@ linkTitle = try $ do link :: GenParser Char ParserState Inline link = try $ do lab <- reference - src <- source <|> referenceLink lab + (src, tit) <- source <|> referenceLink lab sanitize <- getState >>= return . stateSanitizeHTML - if sanitize && unsanitaryURI (fst src) + if sanitize && unsanitaryURI src then fail "Unsanitary URI" - else return $ Link lab src + else return $ Link lab (src, tit) -- a link like [this][ref] or [this][] or [this] referenceLink :: [Inline] @@ -1229,18 +1225,15 @@ referenceLink lab = do autoLink :: GenParser Char ParserState Inline autoLink = try $ do char '<' - src <- uri <|> (emailAddress >>= (return . ("mailto:" ++))) + (orig, src) <- uri <|> emailAddress char '>' - let src' = if "mailto:" `isPrefixOf` src - then drop 7 src - else src st <- getState let sanitize = stateSanitizeHTML st if sanitize && unsanitaryURI src then fail "Unsanitary URI" else return $ if stateStrict st - then Link [Str src'] (src, "") - else Link [Code src'] (src, "") + then Link [Str orig] (src, "") + else Link [Code orig] (src, "") image :: GenParser Char ParserState Inline image = try $ do diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 40b02a25a..5e7ea512e 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -556,7 +556,7 @@ targetURI = do contents <- many1 (try (many spaceChar >> newline >> many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") blanklines - return contents + return $ escapeURI $ removeLeadingTrailingSpace $ contents imageKey :: GenParser Char ParserState ([Inline], (String, [Char])) imageKey = try $ do @@ -565,13 +565,13 @@ imageKey = try $ do skipSpaces string "image::" src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) + return (normalizeSpaces ref, (src, "")) anonymousKey :: GenParser Char st ([Inline], (String, [Char])) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI - return ([Str "_"], (removeLeadingTrailingSpace src, "")) + return ([Str "_"], (src, "")) regularKey :: GenParser Char ParserState ([Inline], (String, [Char])) regularKey = try $ do @@ -579,7 +579,7 @@ regularKey = try $ do ref <- referenceName char ':' src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) + return (normalizeSpaces ref, (src, "")) -- -- tables @@ -883,7 +883,8 @@ explicitLink = try $ do src <- manyTill (noneOf ">\n") (char '>') skipSpaces string "`_" - return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "") + return $ Link (normalizeSpaces label') + (escapeURI $ removeLeadingTrailingSpace src, "") referenceLink :: GenParser Char ParserState Inline referenceLink = try $ do @@ -891,25 +892,25 @@ referenceLink = try $ do key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link state <- getState let keyTable = stateKeys state - src <- case lookupKeySrc keyTable key of - Nothing -> fail "no corresponding key" - Just target -> return target + (src,tit) <- case lookupKeySrc keyTable key of + Nothing -> fail "no corresponding key" + Just target -> return target -- if anonymous link, remove first anon key so it won't be used again let keyTable' = if (key == [Str "_"]) -- anonymous link? - then delete ([Str "_"], src) keyTable -- remove first anon key + then delete ([Str "_"], (src,tit)) keyTable -- remove first anon key else keyTable setState $ state { stateKeys = keyTable' } - return $ Link (normalizeSpaces label') src + return $ Link (normalizeSpaces label') (src, tit) autoURI :: GenParser Char ParserState Inline autoURI = do - src <- uri - return $ Link [Str src] (src, "") + (orig, src) <- uri + return $ Link [Str orig] (src, "") autoEmail :: GenParser Char ParserState Inline autoEmail = do - src <- emailAddress - return $ Link [Str src] ("mailto:" ++ src, "") + (orig, src) <- emailAddress + return $ Link [Str orig] (src, "") autoLink :: GenParser Char ParserState Inline autoLink = autoURI <|> autoEmail @@ -921,7 +922,7 @@ image = try $ do ref <- manyTill inline (char '|') state <- getState let keyTable = stateKeys state - src <- case lookupKeySrc keyTable ref of - Nothing -> fail "no corresponding key" - Just target -> return target - return $ Image (normalizeSpaces ref) src + (src,tit) <- case lookupKeySrc keyTable ref of + Nothing -> fail "no corresponding key" + Just target -> return target + return $ Image (normalizeSpaces ref) (src, tit) |