aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2013-07-12 20:58:15 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2013-07-12 20:58:15 +0100
commitbd1079e48e055f7b58ce13be3dfa8b5c5cb5ba7c (patch)
treed191979d6cc8bbebbbcc9c1536c10bff6e4f978c
parented714b1b52828ad1aa0094a98c392b04dd9c4588 (diff)
downloadpandoc-bd1079e48e055f7b58ce13be3dfa8b5c5cb5ba7c.tar.gz
Docx writer: Ignore most components of reference.docx.
We take the word/styles.xml, docProps/app.xml, word/theme/theme1.xml, and word/fontTable.xml from reference.docx, ignoring everything else. Perhaps this will help with the corruption problems caused when different versions of Word resave the reference.docx and reorganize things.
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs65
1 files changed, 41 insertions, 24 deletions
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 6348e20d2..e899200f6 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -32,8 +32,10 @@ module Text.Pandoc.Writers.Docx ( writeDocx ) where
import Data.List ( intercalate, groupBy )
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.Map as M
import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Monoid ((<>))
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
import Text.Pandoc.Definition
@@ -89,9 +91,6 @@ defaultWriterState = WriterState{
type WS a = StateT WriterState IO a
-showTopElement' :: Element -> String
-showTopElement' x = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++ showElement x
-
mknode :: Node t => String -> [(String,String)] -> t -> Element
mknode s attrs =
add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s)
@@ -99,6 +98,10 @@ mknode s attrs =
toLazy :: B.ByteString -> BL.ByteString
toLazy = BL.fromChunks . (:[])
+renderXml :: Element -> BL.ByteString
+renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
+ UTF8.fromStringLazy (showElement elt)
+
-- | Produce an Docx file from a Pandoc document.
writeDocx :: WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
@@ -155,7 +158,7 @@ writeDocx opts doc@(Pandoc meta _) = do
[("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()]
let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides
let contentTypesEntry = toEntry "[Content_Types].xml" epochtime
- $ UTF8.fromStringLazy $ showTopElement' contentTypesDoc
+ $ renderXml contentTypesDoc
-- word/_rels/document.xml.rels
let toBaseRel (url', id', target') = mknode "Relationship"
@@ -190,22 +193,21 @@ writeDocx opts doc@(Pandoc meta _) = do
let linkrels = map toLinkRel $ M.toList $ stExternalLinks st
let reldoc = mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] $ baserels ++ imgrels ++ linkrels
let relEntry = toEntry "word/_rels/document.xml.rels" epochtime
- $ UTF8.fromStringLazy $ showTopElement' reldoc
+ $ renderXml reldoc
-- create entries for images in word/media/...
let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
let imageEntries = map toImageEntry imgs
-- word/document.xml
- let contentEntry = toEntry "word/document.xml" epochtime $ UTF8.fromStringLazy $ showTopElement' contents
+ let contentEntry = toEntry "word/document.xml" epochtime $ renderXml contents
-- footnotes
- let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ UTF8.fromStringLazy $
- showTopElement' footnotes
+ let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml footnotes
-- footnote rels
- let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime $ UTF8.fromStringLazy $
- showTopElement' $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")]
+ let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime
+ $ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")]
$ linkrels
-- styles
@@ -213,11 +215,11 @@ writeDocx opts doc@(Pandoc meta _) = do
let stylepath = "word/styles.xml"
styledoc <- parseXml refArchive stylepath
let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles }
- let styleEntry = toEntry stylepath epochtime $ UTF8.fromStringLazy $ showTopElement' styledoc'
+ let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
-- construct word/numbering.xml
let numpath = "word/numbering.xml"
- numEntry <- (toEntry numpath epochtime . UTF8.fromStringLazy . showTopElement')
+ numEntry <- (toEntry numpath epochtime . renderXml)
`fmap` mkNumbering (stNumStyles st) (stLists st)
let docPropsPath = "docProps/core.xml"
let docProps = mknode "cp:coreProperties"
@@ -231,21 +233,36 @@ writeDocx opts doc@(Pandoc meta _) = do
(maybe "" id $ normalizeDate $ stringify $ docDate meta)
: mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here
: map (mknode "dc:creator" [] . stringify) (docAuthors meta)
- let docPropsEntry = toEntry docPropsPath epochtime $ UTF8.fromStringLazy $ showTopElement' docProps
- let relsPath = "_rels/.rels"
- rels <- case findEntryByPath relsPath refArchive of
- Just e -> return $ UTF8.toStringLazy $ fromEntry e
- Nothing -> err 57 "could not find .rels/_rels in reference docx"
+ let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
- -- fix .rels/_rels, which can get screwed up when reference.docx is edited by Word
- let rels' = substitute "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"
- "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties"
- rels
- let relsEntry = toEntry relsPath epochtime $ UTF8.fromStringLazy rels'
+ let relsPath = "_rels/.rels"
+ let rels = mknode "Relationships" [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
+ $ map (\attrs -> mknode "Relationship" attrs ())
+ [ [("Id","rId1")
+ ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
+ ,("Target","word/document.xml")]
+ , [("Id","rId4")
+ ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties")
+ ,("Target","docProps/app.xml")]
+ , [("Id","rId3")
+ ,("Type","http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties")
+ ,("Target","docProps/core.xml")]
+ ]
+ let relsEntry = toEntry relsPath epochtime $ renderXml rels
+
+ let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap`
+ parseXml refArchive path
+ docPropsAppEntry <- entryFromArchive "docProps/app.xml"
+ themeEntry <- entryFromArchive "word/theme/theme1.xml"
+ fontTableEntry <- entryFromArchive "word/fontTable.xml"
+ webSettingsEntry <- entryFromArchive "word/webSettings.xml"
-- Create archive
- let archive = foldr addEntryToArchive refArchive $
- contentTypesEntry : relsEntry : contentEntry : relEntry : footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : imageEntries
+ let archive = foldr addEntryToArchive emptyArchive $
+ contentTypesEntry : relsEntry : contentEntry : relEntry :
+ footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
+ docPropsEntry : docPropsAppEntry : themeEntry :
+ fontTableEntry : webSettingsEntry : imageEntries
return $ fromArchive archive
styleToOpenXml :: Style -> [Element]