15 files changed, 273 insertions, 136 deletions
diff --git a/README b/README
index c5223543c..67d7cfb5b 100644
--- a/README
+++ b/README
@@ -1774,7 +1774,10 @@ legal (though ugly) pipe table:
     orange|3.09
 
 The cells of pipe tables cannot contain block elements like paragraphs
-and lists, and cannot span multiple lines.
+and lists, and cannot span multiple lines.  Note also that in LaTeX/PDF
+output, the cells produced by pipe tables will not wrap, since there
+is no information available about relative widths.  If you want content
+to wrap within cells, use multiline or grid tables.
 
   [the same as in PHP markdown extra]:
     http://michelf.ca/projects/php-markdown/extra/#table
diff --git a/pandoc.cabal b/pandoc.cabal
index 377a3b6c4..1c74933ab 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -273,6 +273,7 @@ Library
                    Text.Pandoc.Options,
                    Text.Pandoc.Pretty,
                    Text.Pandoc.Shared,
+                   Text.Pandoc.MediaBag,
                    Text.Pandoc.Readers.HTML,
                    Text.Pandoc.Readers.LaTeX,
                    Text.Pandoc.Readers.Markdown,
@@ -385,7 +386,6 @@ Test-Suite test-pandoc
                   syb >= 0.1 && < 0.5,
                   pandoc,
                   pandoc-types >= 1.12.3.3 && < 1.13,
-                  base64-bytestring >= 0.1 && < 1.1,
                   bytestring >= 0.9 && < 0.11,
                   text >= 0.11 && < 1.2,
                   directory >= 1 && < 1.3,
@@ -399,7 +399,8 @@ Test-Suite test-pandoc
                   QuickCheck >= 2.4 && < 2.8,
                   HUnit >= 1.2 && < 1.3,
                   containers >= 0.1 && < 0.6,
-                  ansi-terminal >= 0.5 && < 0.7
+                  ansi-terminal >= 0.5 && < 0.7,
+                  zip-archive >= 0.2.3.2 && < 0.3
   Other-Modules:  Tests.Old
                   Tests.Helpers
                   Tests.Arbitrary
diff --git a/pandoc.hs b/pandoc.hs
index b3da9b8b1..70f192086 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP, TupleSections #-}
 {-
 Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
@@ -38,26 +38,26 @@ import Text.Pandoc.Readers.LaTeX (handleIncludes)
 import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile,
                             safeRead, headerShift, normalize, err, warn,
                             openURL )
+import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag )
 import Text.Pandoc.XML ( toEntities )
 import Text.Pandoc.SelfContained ( makeSelfContained )
 import Text.Pandoc.Process (pipeProcess)
 import Text.Highlighting.Kate ( languages, Style, tango, pygments,
          espresso, zenburn, kate, haddock, monochrome )
-import System.Environment ( getArgs, getProgName )
+import System.Environment ( getArgs, getProgName, getEnvironment )
 import System.Exit ( exitWith, ExitCode (..) )
 import System.FilePath
 import System.Console.GetOpt
 import Data.Char ( toLower )
 import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort )
 import System.Directory ( getAppUserDataDirectory, findExecutable,
-                          doesFileExist, Permissions(..), getPermissions,
-                          createDirectoryIfMissing )
+                          doesFileExist, Permissions(..), getPermissions )
 import System.IO ( stdout, stderr )
 import System.IO.Error ( isDoesNotExistError )
 import qualified Control.Exception as E
 import Control.Exception.Extensible ( throwIO )
 import qualified Text.Pandoc.UTF8 as UTF8
-import Control.Monad (when, unless, liftM)
+import Control.Monad (when, unless, liftM, (>=>))
 import Data.Foldable (foldrM)
 import Network.URI (parseURI, isURI, URI(..))
 import qualified Data.ByteString.Lazy as B
@@ -69,6 +69,9 @@ import qualified Data.Yaml as Yaml
 import qualified Data.Text as T
 import Control.Applicative ((<$>))
 import Text.Pandoc.Readers.Txt2Tags (getT2TMeta)
+import Data.Monoid
+
+type Transform = Pandoc -> Pandoc
 
 copyrightMessage :: String
 copyrightMessage = "\nCopyright (C) 2006-2014 John MacFarlane\n" ++
@@ -100,7 +103,10 @@ isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","ep
 
 externalFilter :: FilePath -> [String] -> Pandoc -> IO Pandoc
 externalFilter f args' d = do
-      mbexe <- if '/' `elem` f -- don't check PATH if filter name it has a path
+      mbPath <- lookup "PATH" <$> getEnvironment
+      mbexe <- if '/' `elem` f || mbPath == Nothing
+                  -- don't check PATH if filter name has a path, or
+                  -- if the PATH is not set
                   then return Nothing
                   else findExecutable f
       (f', args'') <- case mbexe of
@@ -140,7 +146,7 @@ data Opt = Opt
     , optWriter            :: String  -- ^ Writer format
     , optParseRaw          :: Bool    -- ^ Parse unconvertable HTML and TeX
     , optTableOfContents   :: Bool    -- ^ Include table of contents
-    , optTransforms        :: [Pandoc -> Pandoc]  -- ^ Doc transforms to apply
+    , optTransforms        :: [Transform]  -- ^ Doc transforms to apply
     , optTemplate          :: Maybe FilePath  -- ^ Custom template
     , optVariables         :: [(String,String)] -- ^ Template variables to set
     , optMetadata          :: M.Map String MetaValue -- ^ Metadata fields to set
@@ -933,6 +939,31 @@ defaultWriterName x =
     ['.',y] | y `elem` ['1'..'9'] -> "man"
     _          -> "html"
 
+-- Transformations of a Pandoc document post-parsing:
+
+extractMedia :: MediaBag -> FilePath -> Pandoc -> IO Pandoc
+extractMedia media dir d =
+  case [fp | (fp, _, _) <- mediaDirectory media] of
+        []  -> return d
+        fps -> do
+          extractMediaBag True dir media
+          return $ walk (adjustImagePath dir fps) d
+
+adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
+adjustImagePath dir paths (Image lab (src, tit))
+   | src `elem` paths = Image lab (dir ++ "/" ++ src, tit)
+adjustImagePath _ _ x = x
+
+adjustMetadata :: M.Map String MetaValue -> Pandoc -> IO Pandoc
+adjustMetadata metadata d = return $ M.foldWithKey setMeta d metadata
+
+applyTransforms :: [Transform] -> Pandoc -> IO Pandoc
+applyTransforms transforms d = return $ foldr ($) d transforms
+
+applyFilters :: [FilePath] -> [String] -> Pandoc -> IO Pandoc
+applyFilters filters args d =
+  foldrM ($) d $ map (flip externalFilter args) filters
+
 main :: IO ()
 main = do
 
@@ -1025,7 +1056,6 @@ main = do
                                 all (\f -> takeBaseName f /= "pandoc-citeproc")
                                 filters -> "pandoc-citeproc" : filters
                        _                -> filters
-  let plugins = map externalFilter filters'
 
   let sources = if ignoreArgs then [] else args
 
@@ -1142,6 +1172,40 @@ main = do
                       , readerTrackChanges = trackChanges
                       }
 
+  when (not (isTextFormat writerName') && outputFile == "-") $
+    err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++
+            "Specify an output file using the -o option."
+
+  let readSources [] = mapM readSource ["-"]
+      readSources srcs = mapM readSource srcs
+      readSource "-" = UTF8.getContents
+      readSource src = case parseURI src of
+                            Just u | uriScheme u `elem` ["http:","https:"] ->
+                                       readURI src
+                            _       -> UTF8.readFile src
+      readURI src = do
+        res <- openURL src
+        case res of
+             Left e        -> throwIO e
+             Right (bs,_)  -> return $ UTF8.toString bs
+
+  let readFiles [] = error "Cannot read archive from stdin"
+      readFiles (x:_) = B.readFile x
+
+  let convertTabs = tabFilter (if (preserveTabs || readerName' == "t2t") then 0 else tabStop)
+
+  let handleIncludes' = if readerName' == "latex" || readerName' == "latex+lhs"
+                           then handleIncludes
+                           else return
+
+  (doc, media) <-
+     case reader of
+          StringReader r-> (, mempty) <$>
+            (  readSources >=>
+               handleIncludes' . convertTabs . intercalate "\n" >=>
+               r readerOpts ) sources
+          ByteStringReader r -> readFiles sources >>= r readerOpts
+
   let writerOptions = def { writerStandalone       = standalone',
                             writerTemplate         = templ,
                             writerVariables        = variables'',
@@ -1177,70 +1241,15 @@ main = do
                             writerEpubChapterLevel = epubChapterLevel,
                             writerTOCDepth         = epubTOCDepth,
                             writerReferenceODT     = referenceODT,
-                            writerReferenceDocx    = referenceDocx
+                            writerReferenceDocx    = referenceDocx,
+                            writerMediaBag         = media
                           }
 
-  when (not (isTextFormat writerName') && outputFile == "-") $
-    err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++
-            "Specify an output file using the -o option."
-
-  let readSources [] = mapM readSource ["-"]
-      readSources srcs = mapM readSource srcs
-      readSource "-" = UTF8.getContents
-      readSource src = case parseURI src of
-                            Just u | uriScheme u `elem` ["http:","https:"] ->
-                                       readURI src
-                            _       -> UTF8.readFile src
-      readURI src = do
-        res <- openURL src
-        case res of
-             Left e        -> throwIO e
-             Right (bs,_)  -> return $ UTF8.toString bs
-
-  let readFiles [] = error "Cannot read archive from stdin"
-      readFiles (x:_) = B.readFile x
-
-  let convertTabs = tabFilter (if (preserveTabs || readerName' == "t2t") then 0 else tabStop)
-
-  let handleIncludes' = if readerName' == "latex" || readerName' == "latex+lhs"
-                           then handleIncludes
-                           else return
-
-  let writeMedia :: FilePath -> (FilePath, B.ByteString) -> IO ()
-      writeMedia dir (subpath, bs) = do
-        -- we join and split to convert a/b/c to a\b\c on Windows;
-        -- in zip containers all paths use /
-        let fullpath = dir </> joinPath (splitPath subpath)
-        createDirectoryIfMissing True $ takeDirectory fullpath
-        warn $ "extracting " ++ fullpath
-        B.writeFile fullpath bs
-
-  let adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
-      adjustImagePath dir paths (Image lab (src, tit))
-         | src `elem` paths = Image lab (dir ++ "/" ++ src, tit)
-      adjustImagePath _ _ x = x
 
-  (doc, media) <-
-     case reader of
-          StringReader r-> do
-            inp <- readSources sources >>=
-                       handleIncludes' . convertTabs . intercalate "\n"
-            d <- r readerOpts inp
-            return (d, M.empty)
-          ByteStringReader r -> do
-              (d, media) <- readFiles sources >>= r readerOpts
-              d' <- case mbExtractMedia of
-                       Just dir | not (M.null media) -> do
-                         mapM_ (writeMedia dir) $ M.toList media
-                         return $ walk (adjustImagePath dir (M.keys media)) d
-                       _  -> return d
-              return (d', media)
-
-  let writerOptions' = writerOptions{ writerMediaBag = media }
-
-  let doc0 = M.foldWithKey setMeta doc metadata
-  let doc1 = foldr ($) doc0 transforms
-  doc2 <- foldrM ($) doc1 $ map ($ [writerName']) plugins
+  doc' <- (maybe return (extractMedia media) mbExtractMedia >=>
+           adjustMetadata metadata >=>
+           applyTransforms transforms >=>
+           applyFilters filters' [writerName']) doc
 
   let writeBinary :: B.ByteString -> IO ()
       writeBinary = B.writeFile (UTF8.encodePath outputFile)
@@ -1250,8 +1259,8 @@ main = do
       writerFn f   = UTF8.writeFile f
 
   case writer of
-    IOStringWriter f -> f writerOptions' doc2 >>= writerFn outputFile
-    IOByteStringWriter f -> f writerOptions' doc2 >>= writeBinary
+    IOStringWriter f -> f writerOptions doc' >>= writerFn outputFile
+    IOByteStringWriter f -> f writerOptions doc' >>= writeBinary
     PureStringWriter f
       | pdfOutput -> do
               -- make sure writer is latex or beamer
@@ -1265,14 +1274,14 @@ main = do
                    err 41 $ latexEngine ++ " not found. " ++
                      latexEngine ++ " is needed for pdf output."
 
-              res <- makePDF latexEngine f writerOptions' doc2
+              res <- makePDF latexEngine f writerOptions doc'
               case res of
                    Right pdf -> writeBinary pdf
                    Left err' -> do
                      B.hPutStr stderr $ err'
                      B.hPut stderr $ B.pack [10]
                      err 43 "Error producing PDF from TeX source"
-      | otherwise -> selfcontain (f writerOptions' doc2 ++
+      | otherwise -> selfcontain (f writerOptions doc' ++
                                   ['\n' | not standalone'])
                       >>= writerFn outputFile . handleEntities
           where htmlFormat = writerName' `elem`
@@ -1280,8 +1289,8 @@ main = do
                                "s5","slidy","slideous","dzslides","revealjs"]
                 selfcontain = if selfContained && htmlFormat
                                  then makeSelfContained
-                                      (writerMediaBag writerOptions')
-                                      (writerUserDataDir writerOptions')
+                                      (writerMediaBag writerOptions)
+                                      (writerUserDataDir writerOptions)
                                  else return
                 handleEntities = if htmlFormat && ascii
                                     then toEntities
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 11553383c..77eb3e82f 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -160,7 +160,8 @@ import Text.Pandoc.Writers.Haddock
 import Text.Pandoc.Writers.Custom
 import Text.Pandoc.Templates
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (safeRead, warn, MediaBag)
+import Text.Pandoc.Shared (safeRead, warn)
+import Text.Pandoc.MediaBag (MediaBag)
 import Data.Aeson
 import qualified Data.ByteString.Lazy as BL
 import Data.List (intercalate)
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
new file mode 100644
index 000000000..667089f55
--- /dev/null
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -0,0 +1,107 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-
+Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.MediaBag
+   Copyright   : Copyright (C) 2014 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Definition of a MediaBag object to hold binary resources, and an
+interface for interacting with it.
+-}
+module Text.Pandoc.MediaBag (
+                     MediaBag,
+                     lookupMedia,
+                     insertMedia,
+                     mediaDirectory,
+                     extractMediaBag
+                     ) where
+import System.FilePath
+import System.Directory (createDirectoryIfMissing)
+import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as BL
+import Data.Monoid (Monoid)
+import Control.Monad (when, MonadPlus(..))
+import Text.Pandoc.MIME (getMimeType)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Maybe (fromMaybe)
+import System.IO (stderr)
+
+-- | A container for a collection of binary resources, with names and
+-- mime types.  Note that a 'MediaBag' is a Monoid, so 'mempty'
+-- can be used for an empty 'MediaBag', and '<>' can be used to append
+-- two 'MediaBag's.
+newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString))
+        deriving (Monoid)
+
+instance Show MediaBag where
+  show bag = "MediaBag " ++ show (mediaDirectory bag)
+
+-- | Insert a media item into a 'MediaBag', replacing any existing
+-- value with the same name.
+insertMedia :: FilePath      -- ^ relative path and canonical name of resource
+            -> Maybe String  -- ^ mime type (Nothing = determine from extension)
+            -> BL.ByteString -- ^ contents of resource
+            -> MediaBag
+            -> MediaBag
+insertMedia fp mbMime contents (MediaBag mediamap) =
+  MediaBag (M.insert fp (mime, contents) mediamap)
+  where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback)
+        fallback = case takeExtension fp of
+                        ".gz"   -> getMimeType $ dropExtension fp
+                        _       -> getMimeType fp
+
+-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
+lookupMedia :: FilePath
+            -> MediaBag
+            -> Maybe (String, BL.ByteString)
+lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap
+
+-- | Get a list of the file paths stored in a 'MediaBag', with
+-- their corresponding mime types and the lengths in bytes of the contents.
+mediaDirectory :: MediaBag -> [(String, String, Int)]
+mediaDirectory (MediaBag mediamap) =
+  M.foldWithKey (\fp (mime,contents) ->
+      ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
+
+-- | Extract contents of MediaBag to a given directory.  Print informational
+-- messages if 'verbose' is true.
+extractMediaBag :: Bool
+                -> FilePath
+                -> MediaBag
+                -> IO ()
+extractMediaBag verbose dir (MediaBag mediamap) = do
+  sequence_ $ M.foldWithKey
+     (\fp (_ ,contents) ->
+        ((writeMedia verbose dir (fp, contents)):)) [] mediamap
+
+writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
+writeMedia verbose dir (subpath, bs) = do
+  -- we join and split to convert a/b/c to a\b\c on Windows;
+  -- in zip containers all paths use /
+  let fullpath = dir </> joinPath (splitPath subpath)
+  createDirectoryIfMissing True $ takeDirectory fullpath
+  when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath
+  BL.writeFile fullpath bs
+
+
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 2de1a9e35..85a6a3096 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -49,8 +49,8 @@ import Data.Set (Set)
 import qualified Data.Set as Set
 import Data.Default
 import Text.Pandoc.Highlighting (Style, pygments)
-import Text.Pandoc.Shared (MediaBag)
-import qualified Data.Map as M
+import Text.Pandoc.MediaBag (MediaBag)
+import Data.Monoid
 
 -- | Individually selectable syntax extensions.
 data Extension =
@@ -359,7 +359,7 @@ instance Default WriterOptions where
                       , writerTOCDepth         = 3
                       , writerReferenceODT     = Nothing
                       , writerReferenceDocx    = Nothing
-                      , writerMediaBag         = M.empty
+                      , writerMediaBag         = mempty
                       }
 
 -- | Returns True if the given extension is enabled.
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index f5fb6565a..86ce62ced 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -84,8 +84,10 @@ import Text.Pandoc.Readers.Docx.Lists
 import Text.Pandoc.Readers.Docx.Reducible
 import Text.Pandoc.Readers.Docx.TexChar
 import Text.Pandoc.Shared
+import Text.Pandoc.MediaBag (insertMedia, MediaBag)
 import Data.Maybe (mapMaybe, fromMaybe)
 import Data.List (delete, isPrefixOf, (\\), intercalate, intersect)
+import Data.Monoid
 import qualified Data.ByteString.Lazy as B
 import qualified Data.Map as M
 import Control.Monad.Reader
@@ -108,7 +110,7 @@ data DState = DState { docxAnchorMap :: M.Map String String
 
 defaultDState :: DState
 defaultDState = DState { docxAnchorMap = M.empty
-                       , docxMediaBag  = M.empty
+                       , docxMediaBag  = mempty
                        , docxInHeaderBlock = False
                        , docxInTexSubscript = False}
 
@@ -217,7 +219,8 @@ runStyleToContainers rPr =
                    , if isStrike rPr then (Just Strikeout) else Nothing
                    , if isSuperScript rPr then (Just Superscript) else Nothing
                    , if isSubScript rPr then (Just Subscript) else Nothing
-                   , rUnderline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
+                   , rUnderline rPr >>=
+                     (\f -> if f == "single" then (Just Emph) else Nothing)
                  ]
   in
    classContainers ++ formatters
@@ -369,7 +372,7 @@ parPartToInlines (BookMark _ anchor) =
     return [Span (newAnchor, ["anchor"], []) []]
 parPartToInlines (Drawing fp bs) = do
   mediaBag <- gets docxMediaBag
-  modify $ \s -> s { docxMediaBag = M.insert fp bs mediaBag}
+  modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
   return [Image [] (fp, "")]
 parPartToInlines (InternalHyperLink anchor runs) = do
   ils <- concatMapM runToInlines runs
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 777da3551..adb2c0014 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -41,11 +41,11 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
 import Data.Char (toLower, isAscii, isAlphaNum)
 import Codec.Compression.GZip as Gzip
 import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err, MediaBag)
+import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
 import Text.Pandoc.UTF8 (toString,  fromString)
 import Text.Pandoc.MIME (getMimeType)
 import System.Directory (doesFileExist)
-import qualified Data.Map as M
 
 isOk :: Char -> Bool
 isOk c = isAscii c && isAlphaNum c
@@ -110,16 +110,20 @@ getItem media userdata f =
        -- this is needed for things like cmunrm.eot?#iefix,
        -- which is used to get old versions of IE to work with web fonts.
        let f' = takeWhile (\c -> c /= '?' && c /= '#') f
-       let mime = case takeExtension f' of
-                       ".gz" -> getMimeType $ dropExtension f'
-                       x     -> getMimeType x
+       let mbMime = case takeExtension f' of
+                         ".gz" -> getMimeType $ dropExtension f'
+                         x     -> getMimeType x
        exists <- doesFileExist f'
-       cont <- if exists
-                  then B.readFile f'
-                  else case M.lookup f media of
-                            Just bs  -> return $ BS.concat $ L.toChunks bs
-                            Nothing  -> readDataFile userdata f'
-       return (cont, mime)
+       if exists
+          then do
+            cont <- B.readFile f'
+            return (cont, mbMime)
+          else case lookupMedia f media of
+                    Just (mime,bs) -> return (BS.concat $ L.toChunks bs,
+                                              Just mime)
+                    Nothing        -> do
+                      cont <- readDataFile userdata f'
+                      return (cont, mbMime)
   where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
 
 getRaw :: MediaBag -> Maybe FilePath -> String -> String
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 81aa6cf5a..d5769c1ab 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -48,8 +48,6 @@ module Text.Pandoc.Shared (
                      toRomanNumeral,
                      escapeURI,
                      tabFilter,
-                     -- * Media Handling
-                     MediaBag,
                      -- * Date/time
                      normalizeDate,
                      -- * Pandoc block and inline list processing
@@ -92,6 +90,7 @@ module Text.Pandoc.Shared (
 
 import Text.Pandoc.Definition
 import Text.Pandoc.Walk
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
 import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
 import qualified Text.Pandoc.Builder as B
 import qualified Text.Pandoc.UTF8 as UTF8
@@ -106,7 +105,7 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
 import qualified Data.Set as Set
 import System.Directory
 import Text.Pandoc.MIME (getMimeType)
-import System.FilePath ( (</>), takeExtension, dropExtension )
+import System.FilePath ( (</>), takeExtension, dropExtension)
 import Data.Generics (Typeable, Data)
 import qualified Control.Monad.State as S
 import qualified Control.Exception as E
@@ -120,7 +119,6 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
          renderOptions)
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Char8 as B8
-import qualified Data.ByteString.Lazy as BL
 import Text.Pandoc.Compat.Monoid
 import Data.ByteString.Base64 (decodeLenient)
 import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
@@ -287,14 +285,6 @@ tabFilter tabStop =
         x : go (spsToNextStop - 1) xs
   in  go tabStop
 
----
---- Media handling
----
-
--- | A map of media paths to their binary representations.
-
-type MediaBag = M.Map String BL.ByteString
-
 --
 -- Date/time
 --
@@ -796,13 +786,9 @@ fetchItem sourceURL s
 fetchItem' :: MediaBag -> Maybe String -> String
            -> IO (Either E.SomeException (BS.ByteString, Maybe String))
 fetchItem' media sourceURL s = do
-  case M.lookup s media of
+  case lookupMedia s media of
        Nothing -> fetchItem sourceURL s
-       Just bs -> do
-          let mime = case takeExtension s of
-                          ".gz" -> getMimeType $ dropExtension s
-                          x     -> getMimeType x
-          return $ Right (BS.concat $ toChunks bs, mime)
+       Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
 
 -- | Read from a URL and return raw data and maybe mime type.
 openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 1f222b8b8..770b6f244 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -61,6 +61,7 @@ import Text.Pandoc.MIME (getMimeType)
 import qualified Control.Exception as E
 import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
 import Text.HTML.TagSoup
+import Data.Monoid
 
 -- A Chapter includes a list of blocks and maybe a section
 -- number offset.  Note, some chapters are unnumbered. The section
@@ -793,7 +794,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
     return $ Image lab (newsrc, tit)
 transformInline opts _ (x@(Math _ _))
   | WebTeX _ <- writerHTMLMathMethod opts = do
-    raw <- makeSelfContained M.empty Nothing $ writeHtmlInline opts x
+    raw <- makeSelfContained mempty Nothing $ writeHtmlInline opts x
     return $ RawInline (Format "html") raw
 transformInline opts mediaRef  (RawInline fmt raw)
   | fmt == Format "html" = do
diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs
index 494669fd5..efc520dba 100644
--- a/tests/Tests/Readers/Docx.hs
+++ b/tests/Tests/Readers/Docx.hs
@@ -5,13 +5,15 @@ import Text.Pandoc.Readers.Native
 import Text.Pandoc.Definition
 import Tests.Helpers
 import Test.Framework
-import qualified Data.ByteString as BS
+import Test.HUnit (assertBool)
+import Test.Framework.Providers.HUnit
 import qualified Data.ByteString.Lazy as B
-import qualified Data.ByteString.Char8 as B8
-import qualified Data.ByteString.Base64 as B64
 import Text.Pandoc.Readers.Docx
 import Text.Pandoc.Writers.Native (writeNative)
 import qualified Data.Map as M
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Codec.Archive.Zip
+import System.FilePath (combine)
 
 -- We define a wrapper around pandoc that doesn't normalize in the
 -- tests. Since we do our own normalization, we want to make sure
@@ -55,22 +57,44 @@ testCompareWithOpts opts name docxFile nativeFile =
 testCompare :: String -> FilePath -> FilePath -> Test
 testCompare = testCompareWithOpts def
 
-testCompareMediaIO :: String -> FilePath -> FilePath -> FilePath -> IO Test
-testCompareMediaIO name docxFile mediaPath mediaFile = do
+getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString)
+getMedia archivePath mediaPath = do
+  zf <- B.readFile archivePath >>= return . toArchive
+  return $ findEntryByPath (combine "word" mediaPath) zf >>= (Just . fromEntry)
+
+compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool
+compareMediaPathIO mediaPath mediaBag docxPath = do
+  docxMedia <- getMedia docxPath mediaPath
+  let mbBS   = case lookupMedia mediaPath mediaBag of
+                 Just (_, bs) -> bs
+                 Nothing      -> error ("couldn't find " ++
+                                        mediaPath ++
+                                        " in media bag")
+      docxBS = case docxMedia of
+                 Just bs -> bs
+                 Nothing -> error ("couldn't find " ++
+                                   mediaPath ++
+                                   " in media bag")
+  return $ mbBS == docxBS
+
+compareMediaBagIO :: FilePath -> IO Bool
+compareMediaBagIO docxFile = do
     df <- B.readFile docxFile
-    mf <- B.readFile mediaFile
     let (_, mb) = readDocx def df
-        dBytes = case M.lookup mediaPath mb of
-          Just bs -> bs
-          Nothing -> error "Media file not found"
-        d64 = B8.unpack $ B64.encode $ BS.concat $ B.toChunks dBytes
-        m64 = B8.unpack $ B64.encode $ BS.concat $ B.toChunks mf
-    return $ test id name (d64, m64)
+    bools <- mapM
+             (\(fp, _, _) -> compareMediaPathIO fp mb docxFile) 
+             (mediaDirectory mb)
+    return $ and bools
 
-testCompareMedia :: String -> FilePath -> FilePath -> FilePath -> Test
-testCompareMedia name docxFile mediaPath mediaFile =
-  buildTest $ testCompareMediaIO name docxFile mediaPath mediaFile
+testMediaBagIO :: String -> FilePath -> IO Test
+testMediaBagIO name docxFile = do
+  outcome <- compareMediaBagIO docxFile
+  return $ testCase name (assertBool 
+                          ("Media didn't match media bag in file " ++ docxFile)
+                          outcome)
 
+testMediaBag :: String -> FilePath -> Test
+testMediaBag name docxFile = buildTest $ testMediaBagIO name docxFile
 
 tests :: [Test]
 tests = [ testGroup "inlines"
@@ -185,11 +209,9 @@ tests = [ testGroup "inlines"
             "docx.track_changes_deletion_all.native"
           ]
         , testGroup "media"
-          [ testCompareMedia
+          [ testMediaBag
             "image extraction"
             "docx.image.docx"
-            "media/image1.jpeg"
-            "docx.image1.jpeg"
           ]
         , testGroup "metadata"
           [ testCompareWithOpts def{readerStandalone=True}
diff --git a/tests/docx.image.docx b/tests/docx.image.docx
index 060f2b204..06e4efd1a 100644
--- a/tests/docx.image.docx
+++ b/tests/docx.image.docx
diff --git a/tests/docx.image1.jpeg b/tests/docx.image1.jpeg
deleted file mode 100644
index 423dff48b..000000000
--- a/tests/docx.image1.jpeg
+++ /dev/null
diff --git a/tests/docx.image_no_embed.native b/tests/docx.image_no_embed.native
index aa0f65d27..95c73610e 100644
--- a/tests/docx.image_no_embed.native
+++ b/tests/docx.image_no_embed.native
@@ -1,2 +1,2 @@
-[Header 2 ("an-image",[],[]) [Str "An",Space,Str "image"]
-,Para [Image [] ("media/image1.jpeg","")]]
+[Para [Str "An",Space,Str "image:"]
+,Para [Image [] ("media/image1.jpg","")]]
diff --git a/tests/docx.inline_formatting.native b/tests/docx.inline_formatting.native
index dc8a3d19a..22d8f79e8 100644
--- a/tests/docx.inline_formatting.native
+++ b/tests/docx.inline_formatting.native
@@ -1,5 +1,5 @@
 [Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."]
 ,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."]
-,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Span ("",[],[("underline","single")]) [Str "single",Space,Str "underlines",Space,Str "for",Space,Emph [Str "emphasis"]],Str "."]
+,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Emph [Str "single",Space,Str "underlines",Space,Str "for",Space,Emph [Str "emphasis"]],Str "."]
 ,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."]
 ,Para [Str "A",Space,Str "line",LineBreak,Str "break."]]