aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2010-03-23 15:07:48 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2010-03-23 15:07:48 -0700
commitc87d52223ad97ae7f21c69b09ec7b1467bd35670 (patch)
treec6dc822057ff7b05de2371d0794f49eed8117fa2
parentc3384bb3f147e61d9eba8dd2d3ce0b1450003d4a (diff)
downloadpandoc-c87d52223ad97ae7f21c69b09ec7b1467bd35670.tar.gz
Properly escape URIs in all readers.
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs6
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs13
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs25
-rw-r--r--src/Text/Pandoc/Readers/RST.hs37
4 files changed, 37 insertions, 44 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index e68592b95..5c188e3d9 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -182,7 +182,7 @@ unsanitaryURI u =
"ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
"secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
"snews:", "webcal:", "ymsgr:"]
- in case parseURIReference (stringToURI u) of
+ in case parseURIReference (escapeURI u) of
Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes
Nothing -> True
@@ -746,7 +746,7 @@ link = try $ do
Nothing -> fail "no href"
let title = fromMaybe "" $ extractAttribute "title" attributes
lab <- inlinesTilEnd "a"
- return $ Link (normalizeSpaces lab) (url, title)
+ return $ Link (normalizeSpaces lab) (escapeURI url, title)
image :: GenParser Char ParserState Inline
image = try $ do
@@ -756,5 +756,5 @@ image = try $ do
Nothing -> fail "no src"
let title = fromMaybe "" $ extractAttribute "title" attributes
let alt = fromMaybe "" (extractAttribute "alt" attributes)
- return $ Image [Str alt] (url, title)
+ return $ Image [Str alt] (escapeURI url, title)
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index a2df27c07..36940fab0 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -738,7 +738,7 @@ url :: GenParser Char ParserState Inline
url = try $ do
string "\\url"
url' <- charsInBalanced '{' '}'
- return $ Link [Code url'] (url', "")
+ return $ Link [Code url'] (escapeURI url', "")
link :: GenParser Char ParserState Inline
link = try $ do
@@ -746,17 +746,16 @@ link = try $ do
url' <- manyTill anyChar (char '}')
char '{'
label' <- manyTill inline (char '}')
- return $ Link (normalizeSpaces label') (url', "")
+ return $ Link (normalizeSpaces label') (escapeURI url', "")
image :: GenParser Char ParserState Inline
image = try $ do
("includegraphics", _, args) <- command
let args' = filter isArg args -- filter out options
- let src = if null args' then
- ("", "")
- else
- (stripFirstAndLast (head args'), "")
- return $ Image [Str "image"] src
+ let (src,tit) = case args' of
+ [] -> ("", "")
+ (x:_) -> (stripFirstAndLast x, "")
+ return $ Image [Str "image"] (escapeURI src, tit)
footnote :: GenParser Char ParserState Inline
footnote = try $ do
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index fafd526e6..13edd0586 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown (
readMarkdown
) where
-import Data.List ( transpose, isPrefixOf, isSuffixOf, sortBy, findIndex, intercalate )
+import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate )
import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
import Data.Maybe
@@ -73,10 +73,6 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221;"
-- auxiliary functions
--
--- | Replace spaces with %20
-uriEscapeSpaces :: String -> String
-uriEscapeSpaces = substitute " " "%20"
-
indentSpaces :: GenParser Char ParserState [Char]
indentSpaces = try $ do
state <- getState
@@ -206,7 +202,7 @@ referenceKey = try $ do
tit <- option "" referenceTitle
blanklines
endPos <- getPosition
- let newkey = (lab, (uriEscapeSpaces $ removeTrailingSpace src, tit))
+ let newkey = (lab, (escapeURI $ removeTrailingSpace src, tit))
st <- getState
let oldkeys = stateKeys st
updateState $ \s -> s { stateKeys = newkey : oldkeys }
@@ -1194,7 +1190,7 @@ source' = do
tit <- option "" linkTitle
skipSpaces
eof
- return (uriEscapeSpaces $ removeTrailingSpace src, tit)
+ return (escapeURI $ removeTrailingSpace src, tit)
linkTitle :: GenParser Char st String
linkTitle = try $ do
@@ -1208,11 +1204,11 @@ linkTitle = try $ do
link :: GenParser Char ParserState Inline
link = try $ do
lab <- reference
- src <- source <|> referenceLink lab
+ (src, tit) <- source <|> referenceLink lab
sanitize <- getState >>= return . stateSanitizeHTML
- if sanitize && unsanitaryURI (fst src)
+ if sanitize && unsanitaryURI src
then fail "Unsanitary URI"
- else return $ Link lab src
+ else return $ Link lab (src, tit)
-- a link like [this][ref] or [this][] or [this]
referenceLink :: [Inline]
@@ -1229,18 +1225,15 @@ referenceLink lab = do
autoLink :: GenParser Char ParserState Inline
autoLink = try $ do
char '<'
- src <- uri <|> (emailAddress >>= (return . ("mailto:" ++)))
+ (orig, src) <- uri <|> emailAddress
char '>'
- let src' = if "mailto:" `isPrefixOf` src
- then drop 7 src
- else src
st <- getState
let sanitize = stateSanitizeHTML st
if sanitize && unsanitaryURI src
then fail "Unsanitary URI"
else return $ if stateStrict st
- then Link [Str src'] (src, "")
- else Link [Code src'] (src, "")
+ then Link [Str orig] (src, "")
+ else Link [Code orig] (src, "")
image :: GenParser Char ParserState Inline
image = try $ do
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 40b02a25a..5e7ea512e 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -556,7 +556,7 @@ targetURI = do
contents <- many1 (try (many spaceChar >> newline >>
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
- return contents
+ return $ escapeURI $ removeLeadingTrailingSpace $ contents
imageKey :: GenParser Char ParserState ([Inline], (String, [Char]))
imageKey = try $ do
@@ -565,13 +565,13 @@ imageKey = try $ do
skipSpaces
string "image::"
src <- targetURI
- return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+ return (normalizeSpaces ref, (src, ""))
anonymousKey :: GenParser Char st ([Inline], (String, [Char]))
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
- return ([Str "_"], (removeLeadingTrailingSpace src, ""))
+ return ([Str "_"], (src, ""))
regularKey :: GenParser Char ParserState ([Inline], (String, [Char]))
regularKey = try $ do
@@ -579,7 +579,7 @@ regularKey = try $ do
ref <- referenceName
char ':'
src <- targetURI
- return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+ return (normalizeSpaces ref, (src, ""))
--
-- tables
@@ -883,7 +883,8 @@ explicitLink = try $ do
src <- manyTill (noneOf ">\n") (char '>')
skipSpaces
string "`_"
- return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "")
+ return $ Link (normalizeSpaces label')
+ (escapeURI $ removeLeadingTrailingSpace src, "")
referenceLink :: GenParser Char ParserState Inline
referenceLink = try $ do
@@ -891,25 +892,25 @@ referenceLink = try $ do
key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link
state <- getState
let keyTable = stateKeys state
- src <- case lookupKeySrc keyTable key of
- Nothing -> fail "no corresponding key"
- Just target -> return target
+ (src,tit) <- case lookupKeySrc keyTable key of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
-- if anonymous link, remove first anon key so it won't be used again
let keyTable' = if (key == [Str "_"]) -- anonymous link?
- then delete ([Str "_"], src) keyTable -- remove first anon key
+ then delete ([Str "_"], (src,tit)) keyTable -- remove first anon key
else keyTable
setState $ state { stateKeys = keyTable' }
- return $ Link (normalizeSpaces label') src
+ return $ Link (normalizeSpaces label') (src, tit)
autoURI :: GenParser Char ParserState Inline
autoURI = do
- src <- uri
- return $ Link [Str src] (src, "")
+ (orig, src) <- uri
+ return $ Link [Str orig] (src, "")
autoEmail :: GenParser Char ParserState Inline
autoEmail = do
- src <- emailAddress
- return $ Link [Str src] ("mailto:" ++ src, "")
+ (orig, src) <- emailAddress
+ return $ Link [Str orig] (src, "")
autoLink :: GenParser Char ParserState Inline
autoLink = autoURI <|> autoEmail
@@ -921,7 +922,7 @@ image = try $ do
ref <- manyTill inline (char '|')
state <- getState
let keyTable = stateKeys state
- src <- case lookupKeySrc keyTable ref of
- Nothing -> fail "no corresponding key"
- Just target -> return target
- return $ Image (normalizeSpaces ref) src
+ (src,tit) <- case lookupKeySrc keyTable ref of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ return $ Image (normalizeSpaces ref) (src, tit)