From 1e8a25ad69db8fa372ac4ddbb5ac05ffb00ed052 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Oct 2015 17:06:26 -0700 Subject: Percent-encode more special characters in URLs. HTML, LaTeX writers adjusted. The special characters are '<','>','|','"','{','}','[',']','^', '`'. Closes #1640, #2377. --- src/Text/Pandoc/Shared.hs | 7 +++++-- src/Text/Pandoc/Writers/LaTeX.hs | 10 +++++----- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 1fe3db5f7..1e06aa383 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -288,9 +288,12 @@ toRomanNumeral x = _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ -> "" --- | Escape whitespace in URI. +-- | Escape whitespace and some punctuation characters in URI. escapeURI :: String -> String -escapeURI = escapeURIString (not . isSpace) +escapeURI = escapeURIString (not . needsEscaping) + where needsEscaping c = isSpace c || c `elem` + ['<','>','|','"','{','}','[',']','^', '`'] + -- | Convert tabs to spaces and filter out DOS line endings. -- Tabs will be preserved if tab stop is set to 0. diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f424d8d4a..0caa80795 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -242,7 +242,7 @@ stringToLaTeX ctx (x:xs) = do '^' -> "\\^{}" ++ rest '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows | otherwise -> "\\textbackslash{}" ++ rest - '|' -> "\\textbar{}" ++ rest + '|' | not isUrl -> "\\textbar{}" ++ rest '<' -> "\\textless{}" ++ rest '>' -> "\\textgreater{}" ++ rest '[' -> "{[}" ++ rest -- to avoid interpretation as @@ -848,17 +848,17 @@ inlineToLaTeX (Link txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) return $ text $ "\\url{" ++ src' ++ "}" [Str x] | Just rest <- stripPrefix "mailto:" src, escapeURI x == rest -> -- email autolink do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) contents <- inlineListToLaTeX txt return $ "\\href" <> braces (text src') <> braces ("\\nolinkurl" <> braces contents) _ -> do contents <- inlineListToLaTeX txt - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) return $ text ("\\href{" ++ src' ++ "}{") <> contents <> char '}' inlineToLaTeX (Image _ (source, _)) = do @@ -866,7 +866,7 @@ inlineToLaTeX (Image _ (source, _)) = do let source' = if isURI source then source else unEscapeString source - source'' <- stringToLaTeX URLString source' + source'' <- stringToLaTeX URLString (escapeURI source') inHeading <- gets stInHeading return $ (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") -- cgit v1.2.3