aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2014-08-02 16:09:17 -0700
committerJohn MacFarlane <fiddlosopher@gmail.com>2014-08-02 16:12:05 -0700
commit1d137fbed69a61e751e94539b51a2aad0618026d (patch)
tree41a025b0157f9915a0b27e0b7ea98b5aa5b49718
parentce8922437d3c74139dffe97fa7e0949e03c2eea8 (diff)
downloadpandoc-1d137fbed69a61e751e94539b51a2aad0618026d.tar.gz
Shared: fetchItem improvements.
* More consistent logic: absolute URIs are fetched from the net; other things are treated as relative URIs if sourceURL is a Just, otherwise as file paths. * We escape characters that are not allowed in URIs before trying to parse them (e.g. '|', which often occurs in the wild). * When treating relative paths as local file paths, we drop any fragment or query. This is useful e.g. when you've downloaded web fonts locally, but your source still contains the original relative URLs. Together with the previous commit, this should close #1477.
-rw-r--r--src/Text/Pandoc/Shared.hs23
1 files changed, 12 insertions, 11 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d5769c1ab..df788425b 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -101,7 +101,7 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,
import Data.List ( find, isPrefixOf, intercalate )
import qualified Data.Map as M
import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
- unEscapeString, parseURIReference )
+ unEscapeString, parseURIReference, isAllowedInURI )
import qualified Data.Set as Set
import System.Directory
import Text.Pandoc.MIME (getMimeType)
@@ -766,21 +766,22 @@ readDataFileUTF8 userDir fname =
-- Returns raw content and maybe mime type.
fetchItem :: Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe String))
-fetchItem sourceURL s
- | isURI s = openURL s
- | otherwise =
- case sourceURL >>= parseURIReference of
- Just u -> case parseURIReference s of
- Just s' -> openURL $ show $
- s' `nonStrictRelativeTo` u
- Nothing -> openURL $ show u ++ "/" ++ s
- Nothing -> E.try readLocalFile
+fetchItem sourceURL s =
+ case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of
+ (_, s') | isURI s' -> openURL s'
+ (Just u, s') -> -- try fetching from relative path at source
+ case parseURIReference s' of
+ Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
+ Nothing -> openURL s' -- will throw error
+ (Nothing, _) -> E.try readLocalFile -- get from local file system
where readLocalFile = do
let mime = case takeExtension s of
".gz" -> getMimeType $ dropExtension s
x -> getMimeType x
- cont <- BS.readFile $ unEscapeString s
+ cont <- BS.readFile $ unEscapeString $ dropFragmentAndQuery s
return (cont, mime)
+ dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
+ ensureEscaped = escapeURIString isAllowedInURI
-- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
fetchItem' :: MediaBag -> Maybe String -> String