aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2011-12-02 19:39:30 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2011-12-02 19:39:30 -0800
commitbdec07bac9efb737dbc60e6985d8ba1500a5eeb9 (patch)
tree5362789da366fe94f39cbbb843891fba3c2b46b1 /src
parentc6456ef8a4d3e0412ae7c2c78433cfcbe29aea9f (diff)
downloadpandoc-bdec07bac9efb737dbc60e6985d8ba1500a5eeb9.tar.gz
Shared: Removed unescapeURI, modified escapeURI.
escapeURI now only escapes space characters, leaving unicode characters as they are, instead of converting them to octets and URL-encoding them, as before. This gives more readable URIs. User agents now do the percent-encoding themselves. URIs are no longer unescaped at all on conversion to markdown, asciidoc, rst, org. Closes #349.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Shared.hs25
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs6
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs3
-rw-r--r--src/Text/Pandoc/Writers/Org.hs3
-rw-r--r--src/Text/Pandoc/Writers/RST.hs8
5 files changed, 15 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index be34bc823..86797dcf2 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -44,7 +44,6 @@ module Text.Pandoc.Shared (
camelCaseToHyphenated,
toRomanNumeral,
escapeURI,
- unescapeURI,
tabFilter,
-- * Pandoc block and inline list processing
orderedListMarkers,
@@ -73,11 +72,10 @@ module Text.Pandoc.Shared (
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
-import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
- isLetter, isDigit )
+import Data.Char ( toLower, isLower, isUpper, isAlpha,
+ isLetter, isDigit, isSpace )
import Data.List ( find, isPrefixOf, intercalate )
-import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
-import Codec.Binary.UTF8.String ( encodeString, decodeString )
+import Network.URI ( escapeURIString )
import System.Directory
import System.FilePath ( (</>) )
import Data.Generics (Typeable, Data)
@@ -181,16 +179,9 @@ toRomanNumeral x =
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
_ -> ""
--- | Escape unicode characters in a URI. Characters that are
--- already valid in a URI, including % and ?, are left alone.
+-- | Escape whitespace in URI.
escapeURI :: String -> String
-escapeURI = escapeURIString isAllowedInURI . encodeString
-
--- | Unescape unicode and some special characters in a URI, but
--- without introducing spaces.
-unescapeURI :: String -> String
-unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
- decodeString . unEscapeString
+escapeURI = escapeURIString (not . isSpace)
-- | Convert tabs to spaces and filter out DOS line endings.
-- Tabs will be preserved if tab stop is set to 0.
@@ -304,9 +295,9 @@ consolidateInlines (Str x : ys) =
fromStr (Str z) = z
fromStr _ = error "consolidateInlines - fromStr - not a Str"
consolidateInlines (Space : ys) = Space : rest
- where isSpace Space = True
- isSpace _ = False
- rest = consolidateInlines $ dropWhile isSpace ys
+ where isSp Space = True
+ isSp _ = False
+ rest = consolidateInlines $ dropWhile isSp ys
consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
Emph (xs ++ ys) : zs
consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index ab2b01656..f45c20e9e 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -336,12 +336,11 @@ inlineToAsciiDoc _ (RawInline _ _) = return empty
inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr
inlineToAsciiDoc _ Space = return space
inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
-inlineToAsciiDoc opts (Link txt (src', _tit)) = do
+inlineToAsciiDoc opts (Link txt (src, _tit)) = do
-- relative: link:downloads/foo.zip[download foo.zip]
-- abs: http://google.cod[Google]
-- or my@email.com[email john]
linktext <- inlineListToAsciiDoc opts txt
- let src = unescapeURI src'
let isRelative = ':' `notElem` src
let prefix = if isRelative
then text "link:"
@@ -353,7 +352,7 @@ inlineToAsciiDoc opts (Link txt (src', _tit)) = do
return $ if useAuto
then text srcSuffix
else prefix <> text src <> "[" <> linktext <> "]"
-inlineToAsciiDoc opts (Image alternate (src', tit)) = do
+inlineToAsciiDoc opts (Image alternate (src, tit)) = do
-- image:images/logo.png[Company logo, title="blah"]
let txt = if (null alternate) || (alternate == [Str ""])
then [Str "image"]
@@ -362,7 +361,6 @@ inlineToAsciiDoc opts (Image alternate (src', tit)) = do
let linktitle = if null tit
then empty
else text $ ",title=\"" ++ tit ++ "\""
- let src = unescapeURI src'
return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]"
inlineToAsciiDoc opts (Note [Para inlines]) =
inlineToAsciiDoc opts (Note [Plain inlines])
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 42a59cc5f..70202294f 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -493,12 +493,11 @@ inlineToMarkdown opts (Cite (c:cs) lst)
modekey SuppressAuthor = "-"
modekey _ = ""
inlineToMarkdown _ (Cite _ _) = return $ text ""
-inlineToMarkdown opts (Link txt (src', tit)) = do
+inlineToMarkdown opts (Link txt (src, tit)) = do
linktext <- inlineListToMarkdown opts txt
let linktitle = if null tit
then empty
else text $ " \"" ++ tit ++ "\""
- let src = unescapeURI src'
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
let useRefLinks = writerReferenceLinks opts
let useAuto = case (tit,txt) of
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index f7f314428..6e0fb98e1 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -272,8 +272,7 @@ inlineToOrg (Link txt (src, _)) = do
_ -> do contents <- inlineListToOrg txt
modify $ \s -> s{ stLinks = True }
return $ "[[" <> text src <> "][" <> contents <> "]]"
-inlineToOrg (Image _ (source', _)) = do
- let source = unescapeURI source'
+inlineToOrg (Image _ (source, _)) = do
modify $ \s -> s{ stImages = True }
return $ "[[" <> text source <> "]]"
inlineToOrg (Note contents) = do
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 4cf64c267..0f0479e16 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -298,9 +298,8 @@ inlineToRST Space = return space
inlineToRST (Link [Code _ str] (src, _)) | src == str ||
src == "mailto:" ++ str = do
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
- return $ text $ unescapeURI srcSuffix
-inlineToRST (Link txt (src', tit)) = do
- let src = unescapeURI src'
+ return $ text srcSuffix
+inlineToRST (Link txt (src, tit)) = do
useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
linktext <- inlineListToRST $ normalizeSpaces txt
if useReferenceLinks
@@ -311,8 +310,7 @@ inlineToRST (Link txt (src', tit)) = do
modify $ \st -> st { stLinks = refs' }
return $ "`" <> linktext <> "`_"
else return $ "`" <> linktext <> " <" <> text src <> ">`_"
-inlineToRST (Image alternate (source', tit)) = do
- let source = unescapeURI source'
+inlineToRST (Image alternate (source, tit)) = do
pics <- get >>= return . stImages
let labelsUsed = map fst pics
let txt = if null alternate || alternate == [Str ""] ||