aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2010-02-27 03:06:39 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2010-02-27 03:06:39 +0000
commit07ae5bc264d1c280dc8c7e0798a0df4bd560ab74 (patch)
tree1018c0380f2de10ef52f227d84958a1ea5872eb3 /src/Text/Pandoc
parentd3f1ddf57ee420cdcd573158812a7499a02c09f7 (diff)
downloadpandoc-07ae5bc264d1c280dc8c7e0798a0df4bd560ab74.tar.gz
Markdown reader: properly escape URIs.
+ Resolves Issue #220. + Added escapeURI function to Markdown reader. This escapes links in a way that makes sense for markdown. If they've used URI escapes like %20 in their link, these will be preserved. But if they've used a special character or space without escaping it, it will be escaped. This should make sense in most cases. + Previously pandoc collapsed adjacent spaces and replaced these sequences of spaces with + characters. That isn't correct for a URI path (+ is to be used only in the query part). We've also removed the space-collapsing behavior. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1847 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs12
1 files changed, 10 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index e3052386a..7131af2b7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -45,6 +45,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.ParserCombinators.Parsec
import Control.Monad (when, liftM, unless)
+import Network.URI ( unEscapeString, escapeURIString, isUnescapedInURI )
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ParserState -- ^ Parser state, including options for parser
@@ -73,6 +74,13 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221;"
-- auxiliary functions
--
+-- | Escape a URI in a markdown-appropriate way. First,
+-- we unescape the string, since we don't want to screw things
+-- up if they've entered a properly encoded URI. Then, we
+-- escape the result.
+escapeURI :: String -> String
+escapeURI = escapeURIString isUnescapedInURI . unEscapeString
+
indentSpaces :: GenParser Char ParserState [Char]
indentSpaces = try $ do
state <- getState
@@ -194,7 +202,7 @@ referenceKey = try $ do
tit <- option "" referenceTitle
blanklines
endPos <- getPosition
- let newkey = (lab, (intercalate "+" $ words $ removeTrailingSpace src, tit))
+ let newkey = (lab, (escapeURI $ removeTrailingSpace src, tit))
st <- getState
let oldkeys = stateKeys st
updateState $ \s -> s { stateKeys = newkey : oldkeys }
@@ -1173,7 +1181,7 @@ source' = do
tit <- option "" linkTitle
skipSpaces
eof
- return (intercalate "+" $ words $ removeTrailingSpace src, tit)
+ return (escapeURI $ removeTrailingSpace src, tit)
linkTitle :: GenParser Char st String
linkTitle = try $ do