From 07ae5bc264d1c280dc8c7e0798a0df4bd560ab74 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 27 Feb 2010 03:06:39 +0000 Subject: Markdown reader: properly escape URIs. + Resolves Issue #220. + Added escapeURI function to Markdown reader. This escapes links in a way that makes sense for markdown. If they've used URI escapes like %20 in their link, these will be preserved. But if they've used a special character or space without escaping it, it will be escaped. This should make sense in most cases. + Previously pandoc collapsed adjacent spaces and replaced these sequences of spaces with + characters. That isn't correct for a URI path (+ is to be used only in the query part). We've also removed the space-collapsing behavior. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1847 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Readers/Markdown.hs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index e3052386a..7131af2b7 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -45,6 +45,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec import Control.Monad (when, liftM, unless) +import Network.URI ( unEscapeString, escapeURIString, isUnescapedInURI ) -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ParserState -- ^ Parser state, including options for parser @@ -73,6 +74,13 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221;" -- auxiliary functions -- +-- | Escape a URI in a markdown-appropriate way. First, +-- we unescape the string, since we don't want to screw things +-- up if they've entered a properly encoded URI. Then, we +-- escape the result. +escapeURI :: String -> String +escapeURI = escapeURIString isUnescapedInURI . unEscapeString + indentSpaces :: GenParser Char ParserState [Char] indentSpaces = try $ do state <- getState @@ -194,7 +202,7 @@ referenceKey = try $ do tit <- option "" referenceTitle blanklines endPos <- getPosition - let newkey = (lab, (intercalate "+" $ words $ removeTrailingSpace src, tit)) + let newkey = (lab, (escapeURI $ removeTrailingSpace src, tit)) st <- getState let oldkeys = stateKeys st updateState $ \s -> s { stateKeys = newkey : oldkeys } @@ -1173,7 +1181,7 @@ source' = do tit <- option "" linkTitle skipSpaces eof - return (intercalate "+" $ words $ removeTrailingSpace src, tit) + return (escapeURI $ removeTrailingSpace src, tit) linkTitle :: GenParser Char st String linkTitle = try $ do -- cgit v1.2.3