diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2011-12-02 19:39:30 -0800 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2011-12-02 19:39:30 -0800 |
commit | bdec07bac9efb737dbc60e6985d8ba1500a5eeb9 (patch) | |
tree | 5362789da366fe94f39cbbb843891fba3c2b46b1 /src | |
parent | c6456ef8a4d3e0412ae7c2c78433cfcbe29aea9f (diff) | |
download | pandoc-bdec07bac9efb737dbc60e6985d8ba1500a5eeb9.tar.gz |
Shared: Removed unescapeURI, modified escapeURI.
escapeURI now only escapes space characters, leaving unicode characters
as they are, instead of converting them to octets and URL-encoding them,
as before. This gives more readable URIs. User agents now do the
percent-encoding themselves.
URIs are no longer unescaped at all on conversion to markdown, asciidoc,
rst, org.
Closes #349.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 25 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/AsciiDoc.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Org.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 8 |
5 files changed, 15 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index be34bc823..86797dcf2 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -44,7 +44,6 @@ module Text.Pandoc.Shared ( camelCaseToHyphenated, toRomanNumeral, escapeURI, - unescapeURI, tabFilter, -- * Pandoc block and inline list processing orderedListMarkers, @@ -73,11 +72,10 @@ module Text.Pandoc.Shared ( import Text.Pandoc.Definition import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (readFile) -import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii, - isLetter, isDigit ) +import Data.Char ( toLower, isLower, isUpper, isAlpha, + isLetter, isDigit, isSpace ) import Data.List ( find, isPrefixOf, intercalate ) -import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString ) -import Codec.Binary.UTF8.String ( encodeString, decodeString ) +import Network.URI ( escapeURIString ) import System.Directory import System.FilePath ( (</>) ) import Data.Generics (Typeable, Data) @@ -181,16 +179,9 @@ toRomanNumeral x = _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ -> "" --- | Escape unicode characters in a URI. Characters that are --- already valid in a URI, including % and ?, are left alone. +-- | Escape whitespace in URI. escapeURI :: String -> String -escapeURI = escapeURIString isAllowedInURI . encodeString - --- | Unescape unicode and some special characters in a URI, but --- without introducing spaces. -unescapeURI :: String -> String -unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) . - decodeString . unEscapeString +escapeURI = escapeURIString (not . isSpace) -- | Convert tabs to spaces and filter out DOS line endings. -- Tabs will be preserved if tab stop is set to 0. @@ -304,9 +295,9 @@ consolidateInlines (Str x : ys) = fromStr (Str z) = z fromStr _ = error "consolidateInlines - fromStr - not a Str" consolidateInlines (Space : ys) = Space : rest - where isSpace Space = True - isSpace _ = False - rest = consolidateInlines $ dropWhile isSpace ys + where isSp Space = True + isSp _ = False + rest = consolidateInlines $ dropWhile isSp ys consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $ Emph (xs ++ ys) : zs consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $ diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index ab2b01656..f45c20e9e 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -336,12 +336,11 @@ inlineToAsciiDoc _ (RawInline _ _) = return empty inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr inlineToAsciiDoc _ Space = return space inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst -inlineToAsciiDoc opts (Link txt (src', _tit)) = do +inlineToAsciiDoc opts (Link txt (src, _tit)) = do -- relative: link:downloads/foo.zip[download foo.zip] -- abs: http://google.cod[Google] -- or my@email.com[email john] linktext <- inlineListToAsciiDoc opts txt - let src = unescapeURI src' let isRelative = ':' `notElem` src let prefix = if isRelative then text "link:" @@ -353,7 +352,7 @@ inlineToAsciiDoc opts (Link txt (src', _tit)) = do return $ if useAuto then text srcSuffix else prefix <> text src <> "[" <> linktext <> "]" -inlineToAsciiDoc opts (Image alternate (src', tit)) = do +inlineToAsciiDoc opts (Image alternate (src, tit)) = do -- image:images/logo.png[Company logo, title="blah"] let txt = if (null alternate) || (alternate == [Str ""]) then [Str "image"] @@ -362,7 +361,6 @@ inlineToAsciiDoc opts (Image alternate (src', tit)) = do let linktitle = if null tit then empty else text $ ",title=\"" ++ tit ++ "\"" - let src = unescapeURI src' return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]" inlineToAsciiDoc opts (Note [Para inlines]) = inlineToAsciiDoc opts (Note [Plain inlines]) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 42a59cc5f..70202294f 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -493,12 +493,11 @@ inlineToMarkdown opts (Cite (c:cs) lst) modekey SuppressAuthor = "-" modekey _ = "" inlineToMarkdown _ (Cite _ _) = return $ text "" -inlineToMarkdown opts (Link txt (src', tit)) = do +inlineToMarkdown opts (Link txt (src, tit)) = do linktext <- inlineListToMarkdown opts txt let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" - let src = unescapeURI src' let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src let useRefLinks = writerReferenceLinks opts let useAuto = case (tit,txt) of diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index f7f314428..6e0fb98e1 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -272,8 +272,7 @@ inlineToOrg (Link txt (src, _)) = do _ -> do contents <- inlineListToOrg txt modify $ \s -> s{ stLinks = True } return $ "[[" <> text src <> "][" <> contents <> "]]" -inlineToOrg (Image _ (source', _)) = do - let source = unescapeURI source' +inlineToOrg (Image _ (source, _)) = do modify $ \s -> s{ stImages = True } return $ "[[" <> text source <> "]]" inlineToOrg (Note contents) = do diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 4cf64c267..0f0479e16 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -298,9 +298,8 @@ inlineToRST Space = return space inlineToRST (Link [Code _ str] (src, _)) | src == str || src == "mailto:" ++ str = do let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - return $ text $ unescapeURI srcSuffix -inlineToRST (Link txt (src', tit)) = do - let src = unescapeURI src' + return $ text srcSuffix +inlineToRST (Link txt (src, tit)) = do useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions linktext <- inlineListToRST $ normalizeSpaces txt if useReferenceLinks @@ -311,8 +310,7 @@ inlineToRST (Link txt (src', tit)) = do modify $ \st -> st { stLinks = refs' } return $ "`" <> linktext <> "`_" else return $ "`" <> linktext <> " <" <> text src <> ">`_" -inlineToRST (Image alternate (source', tit)) = do - let source = unescapeURI source' +inlineToRST (Image alternate (source, tit)) = do pics <- get >>= return . stImages let labelsUsed = map fst pics let txt = if null alternate || alternate == [Str ""] || |