From bd1079e48e055f7b58ce13be3dfa8b5c5cb5ba7c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 12 Jul 2013 20:58:15 +0100 Subject: Docx writer: Ignore most components of reference.docx. We take the word/styles.xml, docProps/app.xml, word/theme/theme1.xml, and word/fontTable.xml from reference.docx, ignoring everything else. Perhaps this will help with the corruption problems caused when different versions of Word resave the reference.docx and reorganize things. --- src/Text/Pandoc/Writers/Docx.hs | 65 ++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 24 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 6348e20d2..e899200f6 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -32,8 +32,10 @@ module Text.Pandoc.Writers.Docx ( writeDocx ) where import Data.List ( intercalate, groupBy ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Lazy.Char8 as BL8 import qualified Data.Map as M import qualified Text.Pandoc.UTF8 as UTF8 +import Data.Monoid ((<>)) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Text.Pandoc.Definition @@ -89,9 +91,6 @@ defaultWriterState = WriterState{ type WS a = StateT WriterState IO a -showTopElement' :: Element -> String -showTopElement' x = "\n" ++ showElement x - mknode :: Node t => String -> [(String,String)] -> t -> Element mknode s attrs = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s) @@ -99,6 +98,10 @@ mknode s attrs = toLazy :: B.ByteString -> BL.ByteString toLazy = BL.fromChunks . (:[]) +renderXml :: Element -> BL.ByteString +renderXml elt = BL8.pack "\n" <> + UTF8.fromStringLazy (showElement elt) + -- | Produce an Docx file from a Pandoc document. writeDocx :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert @@ -155,7 +158,7 @@ writeDocx opts doc@(Pandoc meta _) = do [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides let contentTypesEntry = toEntry "[Content_Types].xml" epochtime - $ UTF8.fromStringLazy $ showTopElement' contentTypesDoc + $ renderXml contentTypesDoc -- word/_rels/document.xml.rels let toBaseRel (url', id', target') = mknode "Relationship" @@ -190,22 +193,21 @@ writeDocx opts doc@(Pandoc meta _) = do let linkrels = map toLinkRel $ M.toList $ stExternalLinks st let reldoc = mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] $ baserels ++ imgrels ++ linkrels let relEntry = toEntry "word/_rels/document.xml.rels" epochtime - $ UTF8.fromStringLazy $ showTopElement' reldoc + $ renderXml reldoc -- create entries for images in word/media/... let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img let imageEntries = map toImageEntry imgs -- word/document.xml - let contentEntry = toEntry "word/document.xml" epochtime $ UTF8.fromStringLazy $ showTopElement' contents + let contentEntry = toEntry "word/document.xml" epochtime $ renderXml contents -- footnotes - let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ UTF8.fromStringLazy $ - showTopElement' footnotes + let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml footnotes -- footnote rels - let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime $ UTF8.fromStringLazy $ - showTopElement' $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] + let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime + $ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] $ linkrels -- styles @@ -213,11 +215,11 @@ writeDocx opts doc@(Pandoc meta _) = do let stylepath = "word/styles.xml" styledoc <- parseXml refArchive stylepath let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles } - let styleEntry = toEntry stylepath epochtime $ UTF8.fromStringLazy $ showTopElement' styledoc' + let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml let numpath = "word/numbering.xml" - numEntry <- (toEntry numpath epochtime . UTF8.fromStringLazy . showTopElement') + numEntry <- (toEntry numpath epochtime . renderXml) `fmap` mkNumbering (stNumStyles st) (stLists st) let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" @@ -231,21 +233,36 @@ writeDocx opts doc@(Pandoc meta _) = do (maybe "" id $ normalizeDate $ stringify $ docDate meta) : mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here : map (mknode "dc:creator" [] . stringify) (docAuthors meta) - let docPropsEntry = toEntry docPropsPath epochtime $ UTF8.fromStringLazy $ showTopElement' docProps - let relsPath = "_rels/.rels" - rels <- case findEntryByPath relsPath refArchive of - Just e -> return $ UTF8.toStringLazy $ fromEntry e - Nothing -> err 57 "could not find .rels/_rels in reference docx" + let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps - -- fix .rels/_rels, which can get screwed up when reference.docx is edited by Word - let rels' = substitute "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" - "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties" - rels - let relsEntry = toEntry relsPath epochtime $ UTF8.fromStringLazy rels' + let relsPath = "_rels/.rels" + let rels = mknode "Relationships" [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] + $ map (\attrs -> mknode "Relationship" attrs ()) + [ [("Id","rId1") + ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument") + ,("Target","word/document.xml")] + , [("Id","rId4") + ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties") + ,("Target","docProps/app.xml")] + , [("Id","rId3") + ,("Type","http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties") + ,("Target","docProps/core.xml")] + ] + let relsEntry = toEntry relsPath epochtime $ renderXml rels + + let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap` + parseXml refArchive path + docPropsAppEntry <- entryFromArchive "docProps/app.xml" + themeEntry <- entryFromArchive "word/theme/theme1.xml" + fontTableEntry <- entryFromArchive "word/fontTable.xml" + webSettingsEntry <- entryFromArchive "word/webSettings.xml" -- Create archive - let archive = foldr addEntryToArchive refArchive $ - contentTypesEntry : relsEntry : contentEntry : relEntry : footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : imageEntries + let archive = foldr addEntryToArchive emptyArchive $ + contentTypesEntry : relsEntry : contentEntry : relEntry : + footnoteRelEntry : numEntry : styleEntry : footnotesEntry : + docPropsEntry : docPropsAppEntry : themeEntry : + fontTableEntry : webSettingsEntry : imageEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] -- cgit v1.2.3