6 files changed, 73 insertions, 90 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 7a89c0b04..86ce62ced 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -219,7 +219,8 @@ runStyleToContainers rPr =
                    , if isStrike rPr then (Just Strikeout) else Nothing
                    , if isSuperScript rPr then (Just Superscript) else Nothing
                    , if isSubScript rPr then (Just Subscript) else Nothing
-                   , rUnderline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
+                   , rUnderline rPr >>=
+                     (\f -> if f == "single" then (Just Emph) else Nothing)
                  ]
   in
    classContainers ++ formatters
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index adb2c0014..1a4e037cf 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -32,56 +32,54 @@ the HTML using data URIs.
 -}
 module Text.Pandoc.SelfContained ( makeSelfContained ) where
 import Text.HTML.TagSoup
-import Network.URI (isURI, escapeURIString)
+import Network.URI (isURI, escapeURIString, URI(..), parseURI)
 import Data.ByteString.Base64
 import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString as BS
 import Data.ByteString (ByteString)
-import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
+import System.FilePath (takeExtension, takeDirectory, (</>))
 import Data.Char (toLower, isAscii, isAlphaNum)
 import Codec.Compression.GZip as Gzip
 import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
+import Text.Pandoc.Shared (renderTags', err, fetchItem')
+import Text.Pandoc.MediaBag (MediaBag)
 import Text.Pandoc.UTF8 (toString,  fromString)
-import Text.Pandoc.MIME (getMimeType)
-import System.Directory (doesFileExist)
+import Text.Pandoc.Options (WriterOptions(..))
 
 isOk :: Char -> Bool
 isOk c = isAscii c && isAlphaNum c
 
-convertTag :: MediaBag -> Maybe FilePath -> Tag String -> IO (Tag String)
-convertTag media userdata t@(TagOpen tagname as)
+convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
+convertTag media sourceURL t@(TagOpen tagname as)
   | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
        as' <- mapM processAttribute as
        return $ TagOpen tagname as'
   where processAttribute (x,y) =
            if x == "src" || x == "href" || x == "poster"
               then do
-                (raw, mime) <- getRaw media userdata (fromAttrib "type" t) y
+                (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y
                 let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
                 return (x, enc)
               else return (x,y)
-convertTag media userdata t@(TagOpen "script" as) =
+convertTag media sourceURL t@(TagOpen "script" as) =
   case fromAttrib "src" t of
        []     -> return t
        src    -> do
-           (raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
+           (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
            let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
            return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
-convertTag media userdata t@(TagOpen "link" as) =
+convertTag media sourceURL t@(TagOpen "link" as) =
   case fromAttrib "href" t of
        []  -> return t
        src -> do
-           (raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
+           (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
            let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
            return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
 convertTag _ _ t = return t
 
 -- NOTE: This is really crude, it doesn't respect CSS comments.
-cssURLs :: MediaBag -> Maybe FilePath -> FilePath -> ByteString
+cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
         -> IO ByteString
-cssURLs media userdata d orig =
+cssURLs media sourceURL d orig =
   case B.breakSubstring "url(" orig of
        (x,y) | B.null y  -> return orig
              | otherwise -> do
@@ -94,43 +92,21 @@ cssURLs media userdata d orig =
                   let url' = if isURI url
                                 then url
                                 else d </> url
-                  (raw, mime) <- getRaw media userdata "" url'
-                  rest <- cssURLs media userdata d v
+                  (raw, mime) <- getRaw media sourceURL "" url'
+                  rest <- cssURLs media sourceURL d v
                   let enc = "data:" `B.append` fromString mime `B.append`
                                ";base64," `B.append` (encode raw)
                   return $ x `B.append` "url(" `B.append` enc `B.append` rest
 
-getItem :: MediaBag -> Maybe FilePath -> String
-        -> IO (ByteString, Maybe String)
-getItem media userdata f =
-  if isURI f
-     then openURL f >>= either handleErr return
-     else do
-       -- strip off trailing query or fragment part, if relative URL.
-       -- this is needed for things like cmunrm.eot?#iefix,
-       -- which is used to get old versions of IE to work with web fonts.
-       let f' = takeWhile (\c -> c /= '?' && c /= '#') f
-       let mbMime = case takeExtension f' of
-                         ".gz" -> getMimeType $ dropExtension f'
-                         x     -> getMimeType x
-       exists <- doesFileExist f'
-       if exists
-          then do
-            cont <- B.readFile f'
-            return (cont, mbMime)
-          else case lookupMedia f media of
-                    Just (mime,bs) -> return (BS.concat $ L.toChunks bs,
-                                              Just mime)
-                    Nothing        -> do
-                      cont <- readDataFile userdata f'
-                      return (cont, mbMime)
-  where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
-
-getRaw :: MediaBag -> Maybe FilePath -> String -> String
+getRaw :: MediaBag -> Maybe String -> String -> String
        -> IO (ByteString, String)
-getRaw media userdata mimetype src = do
+getRaw media sourceURL mimetype src = do
   let ext = map toLower $ takeExtension src
-  (raw, respMime) <- getItem media userdata src
+  fetchResult <- fetchItem' media sourceURL src
+  (raw, respMime) <- case fetchResult of
+                          Left msg -> err 67 $ "Could not fetch " ++ src ++
+                                               "\n" ++ show msg
+                          Right x  -> return x
   let raw' = if ext == ".gz"
                 then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
                       $ [raw]
@@ -140,21 +116,22 @@ getRaw media userdata mimetype src = do
                          $ "Could not determine mime type for `" ++ src ++ "'"
                   (x, Nothing) -> x
                   (_, Just x ) -> x
+  let cssSourceURL = case parseURI src of
+                          Just u
+                            | uriScheme u `elem` ["http:","https:"] ->
+                                Just $ show u{ uriPath = "",
+                                               uriQuery = "",
+                                               uriFragment = "" }
+                          _ -> Nothing
   result <- if mime == "text/css"
-               then cssURLs media userdata (takeDirectory src) raw'
+               then cssURLs media cssSourceURL (takeDirectory src) raw'
                else return raw'
   return (result, mime)
 
 -- | Convert HTML into self-contained HTML, incorporating images,
--- scripts, and CSS using data: URIs.  Items specified using absolute
--- URLs will be downloaded; those specified using relative URLs will
--- be sought first relative to the working directory, then in the
--- media bag, then relative
--- to the user data directory (if the first parameter is 'Just'
--- a directory), and finally relative to pandoc's default data
--- directory.
-makeSelfContained :: MediaBag -> Maybe FilePath -> String -> IO String
-makeSelfContained media userdata inp = do
+-- scripts, and CSS using data: URIs.
+makeSelfContained :: WriterOptions -> String -> IO String
+makeSelfContained opts inp = do
   let tags = parseTags inp
-  out' <- mapM (convertTag media userdata) tags
+  out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags
   return $ renderTags' out'
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d5769c1ab..f0e5bbe5d 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -59,6 +59,7 @@ module Text.Pandoc.Shared (
                      normalizeBlocks,
                      removeFormatting,
                      stringify,
+                     capitalize,
                      compactify,
                      compactify',
                      compactify'DL,
@@ -101,7 +102,7 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,
 import Data.List ( find, isPrefixOf, intercalate )
 import qualified Data.Map as M
 import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
-                     unEscapeString, parseURIReference )
+                     unEscapeString, parseURIReference, isAllowedInURI )
 import qualified Data.Set as Set
 import System.Directory
 import Text.Pandoc.MIME (getMimeType)
@@ -122,6 +123,7 @@ import qualified Data.ByteString.Char8 as B8
 import Text.Pandoc.Compat.Monoid
 import Data.ByteString.Base64 (decodeLenient)
 import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
+import qualified Data.Text as T (toUpper, pack, unpack)
 
 #ifdef EMBED_DATA_FILES
 import Text.Pandoc.Data (dataFiles)
@@ -527,6 +529,17 @@ stringify = query go . walk deNote
         deNote (Note _) = Str ""
         deNote x = x
 
+-- | Bring all regular text in a pandoc structure to uppercase.
+-- 
+-- This function correctly handles cases where a lowercase character doesn't
+-- match to a single uppercase character – e.g. “Straße” would be converted
+-- to “STRASSE”, not “STRAßE”.
+capitalize :: Walkable Inline a => a -> a
+capitalize = walk go
+  where go :: Inline -> Inline
+        go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
+        go x       = x
+
 -- | Change final list item from @Para@ to @Plain@ if the list contains
 -- no other @Para@ blocks.
 compactify :: [[Block]]  -- ^ List of list items (each a list of blocks)
@@ -766,21 +779,23 @@ readDataFileUTF8 userDir fname =
 -- Returns raw content and maybe mime type.
 fetchItem :: Maybe String -> String
           -> IO (Either E.SomeException (BS.ByteString, Maybe String))
-fetchItem sourceURL s
-  | isURI s         = openURL s
-  | otherwise       =
-      case sourceURL >>= parseURIReference of
-           Just u  -> case parseURIReference s of
-                           Just s' -> openURL $ show $
-                                        s' `nonStrictRelativeTo` u
-                           Nothing -> openURL $ show u ++ "/" ++ s
-           Nothing -> E.try readLocalFile
+fetchItem sourceURL s =
+  case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of
+       (_, s') | isURI s'  -> openURL s'
+       (Just u, s') -> -- try fetching from relative path at source
+          case parseURIReference s' of
+               Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
+               Nothing -> openURL s' -- will throw error
+       (Nothing, _) -> E.try readLocalFile -- get from local file system
   where readLocalFile = do
-          let mime = case takeExtension s of
-                          ".gz" -> getMimeType $ dropExtension s
-                          x     -> getMimeType x
-          cont <- BS.readFile $ unEscapeString s
+          cont <- BS.readFile fp
           return (cont, mime)
+        dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
+        fp = unEscapeString $ dropFragmentAndQuery s
+        mime = case takeExtension fp of
+                    ".gz" -> getMimeType $ dropExtension fp
+                    x     -> getMimeType x
+        ensureEscaped = escapeURIString isAllowedInURI
 
 -- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
 fetchItem' :: MediaBag -> Maybe String -> String
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 770b6f244..34a6dcb2f 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -61,7 +61,6 @@ import Text.Pandoc.MIME (getMimeType)
 import qualified Control.Exception as E
 import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
 import Text.HTML.TagSoup
-import Data.Monoid
 
 -- A Chapter includes a list of blocks and maybe a section
 -- number offset.  Note, some chapters are unnumbered. The section
@@ -794,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
     return $ Image lab (newsrc, tit)
 transformInline opts _ (x@(Math _ _))
   | WebTeX _ <- writerHTMLMathMethod opts = do
-    raw <- makeSelfContained mempty Nothing $ writeHtmlInline opts x
+    raw <- makeSelfContained opts $ writeHtmlInline opts x
     return $ RawInline (Format "html") raw
 transformInline opts mediaRef  (RawInline fmt raw)
   | fmt == Format "html" = do
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 803617f95..7a9bff4fe 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -28,7 +28,7 @@ module Text.Pandoc.Writers.FB2 (writeFB2)  where
 import Control.Monad.State (StateT, evalStateT, get, modify)
 import Control.Monad.State (liftM, liftM2, liftIO)
 import Data.ByteString.Base64 (encode)
-import Data.Char (toUpper, toLower, isSpace, isAscii, isControl)
+import Data.Char (toLower, isSpace, isAscii, isControl)
 import Data.List (intersperse, intercalate, isPrefixOf)
 import Data.Either (lefts, rights)
 import Network.Browser (browse, request, setAllowRedirects, setOutHandler)
@@ -44,8 +44,7 @@ import qualified Text.XML.Light.Cursor as XC
 
 import Text.Pandoc.Definition
 import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
-import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock)
-import Text.Pandoc.Walk
+import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize)
 
 -- | Data to be written at the end of the document:
 -- (foot)notes, URLs, references, images.
@@ -421,10 +420,6 @@ indent = indentBlock
   indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
                     in  intercalate [LineBreak] $ map ((Str spacer):) lns
 
-capitalize :: Inline -> Inline
-capitalize (Str xs) = Str $ map toUpper xs
-capitalize x = x
-
 -- | Convert a Pandoc's Inline element to FictionBook XML representation.
 toXml :: Inline -> FBM [Content]
 toXml (Str s) = return [txt s]
@@ -434,7 +429,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss
 toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
 toXml (Superscript ss) = list `liftM` wrap "sup" ss
 toXml (Subscript ss) = list `liftM` wrap "sub" ss
-toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss
+toXml (SmallCaps ss) = cMapM toXml $ capitalize ss
 toXml (Quoted SingleQuote ss) = do  -- FIXME: should be language-specific
   inner <- cMapM toXml ss
   return $ [txt "‘"] ++ inner ++ [txt "’"]
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 897e425c6..a859267cc 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
 import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy )
-import Data.Char ( isSpace, isPunctuation, toUpper )
+import Data.Char ( isSpace, isPunctuation )
 import Data.Ord ( comparing )
 import Text.Pandoc.Pretty
 import Control.Monad.State
@@ -672,10 +672,6 @@ escapeSpaces (Str s) = Str $ substitute " " "\\ " s
 escapeSpaces Space = Str "\\ "
 escapeSpaces x = x
 
-toCaps :: Inline -> Inline
-toCaps (Str s) = Str (map toUpper s)
-toCaps x       = x
-
 -- | Convert Pandoc inline element to markdown.
 inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
 inlineToMarkdown opts (Span attrs ils) = do
@@ -693,7 +689,7 @@ inlineToMarkdown opts (Emph lst) = do
 inlineToMarkdown opts (Strong lst) = do
   plain <- gets stPlain
   if plain
-     then inlineListToMarkdown opts $ walk toCaps lst
+     then inlineListToMarkdown opts $ capitalize lst
      else do
        contents <- inlineListToMarkdown opts lst
        return $ "**" <> contents <> "**"
@@ -716,7 +712,7 @@ inlineToMarkdown opts (Subscript lst) = do
 inlineToMarkdown opts (SmallCaps lst) = do
   plain <- gets stPlain
   if plain
-     then inlineListToMarkdown opts $ walk toCaps lst
+     then inlineListToMarkdown opts $ capitalize lst
      else do
        contents <- inlineListToMarkdown opts lst
        return $ tagWithAttrs "span"