aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2010-02-27 03:06:39 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2010-02-27 03:06:39 +0000
commit07ae5bc264d1c280dc8c7e0798a0df4bd560ab74 (patch)
tree1018c0380f2de10ef52f227d84958a1ea5872eb3
parentd3f1ddf57ee420cdcd573158812a7499a02c09f7 (diff)
downloadpandoc-07ae5bc264d1c280dc8c7e0798a0df4bd560ab74.tar.gz
Markdown reader: properly escape URIs.
+ Resolves Issue #220. + Added escapeURI function to Markdown reader. This escapes links in a way that makes sense for markdown. If they've used URI escapes like %20 in their link, these will be preserved. But if they've used a special character or space without escaping it, it will be escaped. This should make sense in most cases. + Previously pandoc collapsed adjacent spaces and replaced these sequences of spaces with + characters. That isn't correct for a URI path (+ is to be used only in the query part). We've also removed the space-collapsing behavior. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1847 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs12
-rw-r--r--tests/markdown-reader-more.native4
2 files changed, 12 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index e3052386a..7131af2b7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -45,6 +45,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.ParserCombinators.Parsec
import Control.Monad (when, liftM, unless)
+import Network.URI ( unEscapeString, escapeURIString, isUnescapedInURI )
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ParserState -- ^ Parser state, including options for parser
@@ -73,6 +74,13 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221;"
-- auxiliary functions
--
+-- | Escape a URI in a markdown-appropriate way. First,
+-- we unescape the string, since we don't want to screw things
+-- up if they've entered a properly encoded URI. Then, we
+-- escape the result.
+escapeURI :: String -> String
+escapeURI = escapeURIString isUnescapedInURI . unEscapeString
+
indentSpaces :: GenParser Char ParserState [Char]
indentSpaces = try $ do
state <- getState
@@ -194,7 +202,7 @@ referenceKey = try $ do
tit <- option "" referenceTitle
blanklines
endPos <- getPosition
- let newkey = (lab, (intercalate "+" $ words $ removeTrailingSpace src, tit))
+ let newkey = (lab, (escapeURI $ removeTrailingSpace src, tit))
st <- getState
let oldkeys = stateKeys st
updateState $ \s -> s { stateKeys = newkey : oldkeys }
@@ -1173,7 +1181,7 @@ source' = do
tit <- option "" linkTitle
skipSpaces
eof
- return (intercalate "+" $ words $ removeTrailingSpace src, tit)
+ return (escapeURI $ removeTrailingSpace src, tit)
linkTitle :: GenParser Char st String
linkTitle = try $ do
diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native
index 716ab9c33..5483e4c4d 100644
--- a/tests/markdown-reader-more.native
+++ b/tests/markdown-reader-more.native
@@ -6,8 +6,8 @@ Pandoc (Meta [] [] [])
, Para [TeX "\\placeformula",Space,TeX "\\startformula\n L_{1} = L_{2}\n \\stopformula"]
, Para [TeX "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]"]
, Header 2 [Str "URLs",Space,Str "with",Space,Str "spaces"]
-, Para [Link [Str "foo"] ("/bar+and+baz",""),Space,Link [Str "foo"] ("/bar+and+baz",""),Space,Link [Str "foo"] ("/bar+and+baz",""),Space,Link [Str "foo"] ("bar+baz","title")]
-, Para [Link [Str "baz"] ("/foo+foo",""),Space,Link [Str "bam"] ("/foo+fee",""),Space,Link [Str "bork"] ("/foo/zee+zob","title")]
+, Para [Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20%20and%20%20baz",""),Space,Link [Str "foo"] ("bar%20baz","title")]
+, Para [Link [Str "baz"] ("/foo%20foo",""),Space,Link [Str "bam"] ("/foo%20fee",""),Space,Link [Str "bork"] ("/foo/zee%20zob","title")]
, Header 2 [Str "Horizontal",Space,Str "rules",Space,Str "with",Space,Str "spaces",Space,Str "at",Space,Str "end"]
, HorizontalRule
, HorizontalRule