Docx writer: Preliminary improvements.

* Use getItem to fetch images, so we can get them over the net if they have absolute URLs. * Added TODO notes for cleaning up the logic.
author: John MacFarlane <fiddlosopher@gmail.com> 2013-01-11 12:17:41 -0800
committer: John MacFarlane <fiddlosopher@gmail.com> 2013-01-11 12:17:41 -0800
commit: 4e4c3537e01c4335a09d47d330925484dd158974 (patch)
tree: d5e91bdb13782ec01e37af9b8516826876249da7 /src/Text/Pandoc
parent: 2685ebff0f644be43d743ee327345165d43815b5 (diff)
download: pandoc-4e4c3537e01c4335a09d47d330925484dd158974.tar.gz
1 files changed, 23 insertions, 18 deletions
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 839bb16d4..5d220ca79 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE ScopedTypeVariables #-}
 {-
 Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
 
@@ -29,15 +30,14 @@ Conversion of 'Pandoc' documents to docx.
 -}
 module Text.Pandoc.Writers.Docx ( writeDocx ) where
 import Data.List ( intercalate )
-import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
 import qualified Data.Map as M
 import qualified Text.Pandoc.UTF8 as UTF8
-import System.IO ( stderr )
 import Codec.Archive.Zip
 import Data.Time.Clock.POSIX
 import Text.Pandoc.Definition
 import Text.Pandoc.Generic
-import System.Directory
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Shared hiding (Element)
 import Text.Pandoc.Options
@@ -51,6 +51,7 @@ import Text.Highlighting.Kate
 import Data.Unique (hashUnique, newUnique)
 import System.Random (randomRIO)
 import Text.Printf (printf)
+import qualified Control.Exception as E
 
 data WriterState = WriterState{
          stTextProperties :: [Element]
@@ -93,17 +94,19 @@ mknode :: Node t => String -> [(String,String)] -> t -> Element
 mknode s attrs =
   add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s)
 
+toLazy :: B.ByteString -> BL.ByteString
+toLazy = BL.fromChunks . (:[])
+
 -- | Produce an Docx file from a Pandoc document.
 writeDocx :: WriterOptions  -- ^ Writer options
           -> Pandoc         -- ^ Document to convert
-          -> IO B.ByteString
+          -> IO BL.ByteString
 writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do
   let datadir = writerUserDataDir opts
-  refArchive <- liftM toArchive $
+  refArchive <- liftM (toArchive . toLazy) $
        case writerReferenceDocx opts of
-             Just f -> B.readFile f
-             Nothing -> (B.fromChunks . (:[])) `fmap`
-                          readDataFile datadir "reference.docx"
+             Just f  -> B.readFile f
+             Nothing -> readDataFile datadir "reference.docx"
 
   ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc)
                        defaultWriterState
@@ -125,7 +128,7 @@ writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do
   let reldoc' = reldoc{ elContent = elContent reldoc ++ map Elem newrels }
   -- create entries for images
   let toImageEntry (ident,img) = toEntry ("word/" ++ imgPath ident img)
-         epochtime img
+         epochtime $ toLazy img
   let imageEntries = map toImageEntry imgs
   -- NOW get list of external links and images from this, and do what's needed
   let toLinkRel (src,ident) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
@@ -623,14 +626,20 @@ inlineToOpenXML opts (Link txt (src,_)) = do
               return i
   return [ mknode "w:hyperlink" [("r:id",id')] contents ]
 inlineToOpenXML opts (Image alt (src, tit)) = do
-  exists <- liftIO $ doesFileExist src
-  if exists
-     then do
+  res <- liftIO $ E.try $ getItem (writerUserDataDir opts) src
+  -- res is Right (img, maybeMIMEString) or Left err
+  case res of
+     Left (_ :: E.SomeException) -> do
+       liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."
+       inlinesToOpenXML opts alt
+     Right (img, _) -> do
        imgs <- gets stImages
+       -- TODO move this check to before the getItem
+       -- also TODO, instead of storing ident, imagebs; store
+       -- the whole Element, so we don't have to reconstruct it at all.
        (ident,size) <- case M.lookup src imgs of
-                            Just (i,img) -> return (i, imageSize img)
+                            Just (i,img') -> return (i, imageSize img')
                             Nothing -> do
-                              img <- liftIO $ B.readFile src
                               ident' <- ("rId"++) `fmap` getUniqueId
                               let size'  = imageSize img
                               modify $ \st -> st{
@@ -672,10 +681,6 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
                , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] ()
                , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] ()
                , graphic ] ]
-     else do
-       liftIO $ UTF8.hPutStrLn stderr $
-          "Could not find image `" ++ src ++ "', skipping..."
-       inlinesToOpenXML opts alt
 
 br :: Element
 br = mknode "w:r" [] [mknode "w:cr" [] () ]
author	John MacFarlane <fiddlosopher@gmail.com>	2013-01-11 12:17:41 -0800
committer	John MacFarlane <fiddlosopher@gmail.com>	2013-01-11 12:17:41 -0800
commit	4e4c3537e01c4335a09d47d330925484dd158974 (patch)
tree	d5e91bdb13782ec01e37af9b8516826876249da7 /src/Text/Pandoc
parent	2685ebff0f644be43d743ee327345165d43815b5 (diff)
download	pandoc-4e4c3537e01c4335a09d47d330925484dd158974.tar.gz