3 files changed, 38 insertions, 14 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 2e5473992..317c002e9 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -182,7 +182,7 @@ unsanitaryURI u =
              "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
              "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
              "snews:", "webcal:", "ymsgr:"]
-  in  case parseURIReference u of
+  in  case parseURIReference (stringToURI u) of
            Just p  -> (map toLower $ uriScheme p) `notElem` safeURISchemes
            Nothing -> True
 
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index f093ddbee..2c1d3ab71 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -43,6 +43,7 @@ module Text.Pandoc.Shared (
                      stripFirstAndLast,
                      camelCaseToHyphenated,
                      toRomanNumeral,
+                     stringToURI,
                      wrapped,
                      wrapIfNeeded,
                      wrappedTeX,
@@ -114,7 +115,7 @@ import Text.ParserCombinators.Parsec
 import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest )
 import qualified Text.PrettyPrint.HughesPJ as PP
 import Text.Pandoc.CharacterReferences ( characterReference )
-import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha,
+import Data.Char ( toLower, toUpper, ord, chr, isLower, isUpper, isAlpha,
                    isPunctuation )
 import Data.List ( find, isPrefixOf, intercalate )
 import Network.URI ( parseURI, URI (..), isAllowedInURI )
@@ -130,7 +131,12 @@ import System.IO.UTF8
 import Data.Generics
 import qualified Control.Monad.State as S
 import Control.Monad (join)
+import Data.ByteString (unpack)
+import Data.Word (Word8)
+import Data.ByteString.UTF8 (fromString)
+import Text.Printf (printf)
 import Paths_pandoc (getDataFileName)
+
 --
 -- List processing
 --
@@ -228,6 +234,16 @@ toRomanNumeral x =
               _ | x >= 1    -> "I" ++ toRomanNumeral (x - 1)
               _             -> ""
 
+-- | Escape unicode characters in a URI.  This means converting
+-- them to UTF-8, then URI-encoding the octets.  We leave everything
+-- else the same, assuming that the user has already escaped
+-- special characters like & and %.
+stringToURI :: String -> String
+stringToURI = concatMap encodeOctet . unpack . fromString
+  where encodeOctet :: Word8 -> String
+        encodeOctet x | x > 127 = printf "%%%2x" x
+        encodeOctet x = [chr (fromIntegral x)]
+
 -- | Wrap inlines to line length.
 wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
 wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>= 
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index d33dcff27..81f403f95 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -67,6 +67,14 @@ renderFragment opts = if writerWrapText opts
 stringToHtml :: String -> Html
 stringToHtml = primHtml . escapeStringForXML
 
+-- Note: href and src, unmodified, incorrectly escape high
+-- characters in URIs using entities.  So we use these replacements:
+href' :: String -> HtmlAttr
+href' = href . stringToURI
+
+src' :: String -> HtmlAttr
+src' = src . stringToURI
+
 -- | Convert Pandoc document to Html string.
 writeHtmlString :: WriterOptions -> Pandoc -> String
 writeHtmlString opts d =
@@ -112,13 +120,13 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do
                 then case writerHTMLMathMethod opts of
                            LaTeXMathML (Just url) ->
                               script ! 
-                              [src url, thetype "text/javascript"] $ noHtml
+                              [src' url, thetype "text/javascript"] $ noHtml
                            MathML (Just url) ->
                               script ! 
-                              [src url, thetype "text/javascript"] $ noHtml
+                              [src' url, thetype "text/javascript"] $ noHtml
                            JsMath (Just url) ->
                               script !
-                              [src url, thetype "text/javascript"] $ noHtml
+                              [src' url, thetype "text/javascript"] $ noHtml
                            _ -> case lookup "mathml-script" (writerVariables opts) of
                                       Just s -> 
                                         script ! [thetype "text/javascript"] <<
@@ -188,7 +196,7 @@ elementToListItem opts (Sec _ num id' headerText subsecs) = do
   let subList = if null subHeads
                    then noHtml
                    else unordList subHeads
-  return $ Just $ (anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ id')] $ txt) +++ subList
+  return $ Just $ (anchor ! [href' ("#" ++ writerIdentifierPrefix opts ++ id')] $ txt) +++ subList
 
 -- | Convert an Element to Html.
 elementToHtml :: WriterOptions -> Element -> State WriterState Html
@@ -222,7 +230,7 @@ parseMailto _ = Nothing
 -- | Obfuscate a "mailto:" link.
 obfuscateLink :: WriterOptions -> String -> String -> Html
 obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation =
-  anchor ! [href s] << txt
+  anchor ! [href' s] << txt
 obfuscateLink opts txt s =
   let meth = writerEmailObfuscation opts
       s' = map toLower s 
@@ -249,7 +257,7 @@ obfuscateLink opts txt s =
                      linkText  ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++  
                      noscript (primHtml $ obfuscateString altText)
                 _ -> error $ "Unknown obfuscation method: " ++ show meth
-        _ -> anchor ! [href s] $ stringToHtml txt  -- malformed email
+        _ -> anchor ! [href' s] $ stringToHtml txt  -- malformed email
 
 -- | Obfuscate character as entity.
 obfuscateChar :: Char -> String
@@ -312,7 +320,7 @@ blockToHtml opts (Header level lst) = do
                             stringToHtml " " +++ contents
                      else contents
   let contents''  = if writerTableOfContents opts
-                       then anchor ! [href $ "#" ++ writerIdentifierPrefix opts ++ "TOC"] $ contents'
+                       then anchor ! [href' $ "#" ++ writerIdentifierPrefix opts ++ "TOC"] $ contents'
                        else contents'
   return $ case level of
               1 -> h1 contents''
@@ -452,7 +460,7 @@ inlineToHtml opts inline =
                                               then thespan ! [theclass "math"] $ primHtml str
                                               else thediv ! [theclass "math"]  $ primHtml str
                                MimeTeX url -> 
-                                  return $ image ! [src (url ++ "?" ++ str),
+                                  return $ image ! [src' (url ++ "?" ++ str),
                                                     alt str, title str]
                                GladTeX ->
                                   return $ primHtml $ "<EQ>" ++ str ++ "</EQ>"
@@ -484,13 +492,13 @@ inlineToHtml opts inline =
                         return $ obfuscateLink opts (show linkText) s
     (Link txt (s,tit)) -> do
                         linkText <- inlineListToHtml opts txt
-                        return $ anchor ! ([href s] ++ 
+                        return $ anchor ! ([href' s] ++ 
                                  if null tit then [] else [title tit]) $ 
                                  linkText
     (Image txt (s,tit)) -> do
                         alternate <- inlineListToHtml opts txt
                         let alternate' = renderFragment opts alternate
-                        let attributes = [src s] ++
+                        let attributes = [src' s] ++
                                          (if null tit 
                                             then [] 
                                             else [title tit]) ++ 
@@ -508,7 +516,7 @@ inlineToHtml opts inline =
                         -- push contents onto front of notes
                         put $ st {stNotes = (htmlContents:notes)} 
                         return $ sup <<
-                                 anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref),
+                                 anchor ! [href' ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref),
                                            theclass "footnoteRef",
                                            prefixedId opts ("fnref" ++ ref)] << ref
     (Cite _ il)  -> inlineListToHtml opts il
@@ -517,7 +525,7 @@ blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
 blockListToNote opts ref blocks =
   -- If last block is Para or Plain, include the backlink at the end of
   -- that block. Otherwise, insert a new Plain block with the backlink.
-  let backlink = [HtmlInline $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++ 
+  let backlink = [HtmlInline $ " <a href=\"#" ++ stringToURI (writerIdentifierPrefix opts ++ "fnref" ++ ref) ++ 
                  "\" class=\"footnoteBackLink\"" ++
                  " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"]
       blocks'  = if null blocks