diff options
author | John MacFarlane <jgm@berkeley.edu> | 2010-03-23 13:51:52 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2010-03-23 13:51:52 -0700 |
commit | 9e658673006ca8c934bb75b224fdc0e7144b4030 (patch) | |
tree | e477eb971c7dca5d18fac1871fa90566bbac73fa /src/Text | |
parent | 4fbacd5d5bed09980af17d6fb90f5a8fd074ffa3 (diff) | |
download | pandoc-9e658673006ca8c934bb75b224fdc0e7144b4030.tar.gz |
Better definition of stringToURI.
Now it escapes all characters that aren't allowed in a URI.
%, ?, /, and other characters that are allowed in a URI are
left alone. Unicode high characters are UTF-8 encoded.
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 20 |
1 files changed, 6 insertions, 14 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 42e3345c8..0e1ace858 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -115,10 +115,11 @@ import Text.ParserCombinators.Parsec import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest ) import qualified Text.PrettyPrint.HughesPJ as PP import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, chr, isLower, isUpper, isAlpha, +import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, isPunctuation ) import Data.List ( find, isPrefixOf, intercalate ) -import Network.URI ( parseURI, URI (..), isAllowedInURI ) +import Network.URI ( parseURI, URI (..), isAllowedInURI, escapeURIString ) +import Codec.Binary.UTF8.String ( encodeString ) import System.Directory import System.FilePath ( (</>) ) -- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv @@ -131,10 +132,6 @@ import System.IO.UTF8 import Data.Generics import qualified Control.Monad.State as S import Control.Monad (join) -import Data.ByteString (unpack) -import Data.Word (Word8) -import Data.ByteString.UTF8 (fromString) -import Text.Printf (printf) import Paths_pandoc (getDataFileName) -- @@ -234,15 +231,10 @@ toRomanNumeral x = _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ -> "" --- | Escape unicode characters in a URI. This means converting --- them to UTF-8, then URI-encoding the octets. We leave everything --- else the same, assuming that the user has already escaped --- special characters like & and %. +-- | Escape unicode characters in a URI. Characters that are +-- already valid in a URI, including % and ?, are left alone. stringToURI :: String -> String -stringToURI = concatMap encodeOctet . unpack . fromString - where encodeOctet :: Word8 -> String - encodeOctet x | x > 127 = printf "%%%2x" x - encodeOctet x = [chr (fromIntegral x)] +stringToURI = escapeURIString isAllowedInURI . encodeString -- | Wrap inlines to line length. wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc |