diff options
Diffstat (limited to 'src/Text/Pandoc/Writers/Docx.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 483 |
1 files changed, 363 insertions, 120 deletions
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 2a834c2da..5b9cc62ab 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,6 +1,6 @@ -{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ScopedTypeVariables, PatternGuards #-} {- -Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,8 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.Maybe (fromMaybe) -import Data.List ( intercalate, isPrefixOf, isSuffixOf ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -39,6 +38,10 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Compat.Monoid ((<>)) import Codec.Archive.Zip import Data.Time.Clock.POSIX +import Data.Time.Clock +import Data.Time.Format +import System.Environment +import System.Locale import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.ImageSize @@ -49,7 +52,7 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () -import Text.XML.Light +import Text.XML.Light as XML import Text.TeXMath import Control.Monad.State import Text.Highlighting.Kate @@ -57,8 +60,35 @@ import Data.Unique (hashUnique, newUnique) import System.Random (randomRIO) import Text.Printf (printf) import qualified Control.Exception as E -import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) -import Control.Applicative ((<|>)) +import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, + extensionFromMimeType) +import Control.Applicative ((<$>), (<|>), (<*>)) +import Data.Maybe (fromMaybe, mapMaybe) +import Data.Char (isDigit) + +data ListMarker = NoMarker + | BulletMarker + | NumberMarker ListNumberStyle ListNumberDelim Int + deriving (Show, Read, Eq, Ord) + +listMarkerToId :: ListMarker -> String +listMarkerToId NoMarker = "990" +listMarkerToId BulletMarker = "991" +listMarkerToId (NumberMarker sty delim n) = + '9' : '9' : styNum : delimNum : show n + where styNum = case sty of + DefaultStyle -> '2' + Example -> '3' + Decimal -> '4' + LowerRoman -> '5' + UpperRoman -> '6' + LowerAlpha -> '7' + UpperAlpha -> '8' + delimNum = case delim of + DefaultDelim -> '0' + Period -> '1' + OneParen -> '2' + TwoParens -> '3' data WriterState = WriterState{ stTextProperties :: [Element] @@ -66,18 +96,19 @@ data WriterState = WriterState{ , stFootnotes :: [Element] , stSectionIds :: [String] , stExternalLinks :: M.Map String String - , stImages :: M.Map FilePath (String, String, Maybe String, Element, B.ByteString) + , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString) , stListLevel :: Int , stListNumId :: Int - , stNumStyles :: M.Map ListMarker Int , stLists :: [ListMarker] + , stInsId :: Int + , stDelId :: Int + , stInDel :: Bool + , stChangesAuthor :: String + , stChangesDate :: String + , stPrintWidth :: Integer + , stHeadingStyles :: [(Int,String)] } -data ListMarker = NoMarker - | BulletMarker - | NumberMarker ListNumberStyle ListNumberDelim Int - deriving (Show, Read, Eq, Ord) - defaultWriterState :: WriterState defaultWriterState = WriterState{ stTextProperties = [] @@ -88,15 +119,27 @@ defaultWriterState = WriterState{ , stImages = M.empty , stListLevel = -1 , stListNumId = 1 - , stNumStyles = M.fromList [(NoMarker, 0)] , stLists = [NoMarker] + , stInsId = 1 + , stDelId = 1 + , stInDel = False + , stChangesAuthor = "unknown" + , stChangesDate = "1969-12-31T19:00:00Z" + , stPrintWidth = 1 + , stHeadingStyles = [] } type WS a = StateT WriterState IO a mknode :: Node t => String -> [(String,String)] -> t -> Element mknode s attrs = - add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s) + add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) + +nodename :: String -> QName +nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } + where (name, prefix) = case break (==':') s of + (xs,[]) -> (xs, Nothing) + (ys, _:zs) -> (zs, Just ys) toLazy :: B.ByteString -> BL.ByteString toLazy = BL.fromChunks . (:[]) @@ -105,6 +148,31 @@ renderXml :: Element -> BL.ByteString renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> UTF8.fromStringLazy (showElement elt) +renumIdMap :: Int -> [Element] -> M.Map String String +renumIdMap _ [] = M.empty +renumIdMap n (e:es) + | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = + M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es) + | otherwise = renumIdMap n es + +replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] +replaceAttr _ _ [] = [] +replaceAttr f val (a:as) | f (attrKey a) = + (XML.Attr (attrKey a) val) : (replaceAttr f val as) + | otherwise = a : (replaceAttr f val as) + +renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element +renumId f renumMap e + | Just oldId <- findAttrBy f e + , Just newId <- M.lookup oldId renumMap = + let attrs' = replaceAttr f newId (elAttribs e) + in + e { elAttribs = attrs' } + | otherwise = e + +renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element] +renumIds f renumMap = map (renumId f renumMap) + -- | Produce an Docx file from a Pandoc document. writeDocx :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert @@ -112,16 +180,92 @@ writeDocx :: WriterOptions -- ^ Writer options writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts let doc' = walk fixDisplayMath doc + username <- lookup "USERNAME" <$> getEnvironment + utctime <- getCurrentTime refArchive <- liftM (toArchive . toLazy) $ case writerReferenceDocx opts of Just f -> B.readFile f Nothing -> readDataFile datadir "reference.docx" + distArchive <- liftM (toArchive . toLazy) $ readDataFile datadir "reference.docx" + + parsedDoc <- parseXml refArchive distArchive "word/document.xml" + let wname f qn = qPrefix qn == Just "w" && f (qName qn) + let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + + -- Gets the template size + let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz"))) + let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName)) + + let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar"))) + let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName)) + let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName)) + + -- Get the avaible area (converting the size and the margins to int and + -- doing the difference + let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer) + <*> ( + (+) <$> (read <$> mbAttrMarRight ::Maybe Integer) + <*> (read <$> mbAttrMarLeft ::Maybe Integer) + ) + + -- styles + let stylepath = "word/styles.xml" + styledoc <- parseXml refArchive distArchive stylepath + + -- parse styledoc for heading styles + let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . + filter ((==Just "xmlns") . qPrefix . attrKey) . + elAttribs $ styledoc + let headingStyles = + let + mywURI = lookup "w" styleNamespaces + myName name = QName name mywURI (Just "w") + getAttrStyleId = findAttr (myName "styleId") + getNameVal = findChild (myName "name") >=> findAttr (myName "val") + getNum s | not $ null s, all isDigit s = Just (read s :: Int) + | otherwise = Nothing + getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum + getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum + toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId + toMap getF = mapMaybe (toTuple getF) $ + findChildren (myName "style") styledoc + select a b | not $ null a = a + | otherwise = b + in + select (toMap getEngHeader) (toMap getIntHeader) ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') - defaultWriterState - epochtime <- floor `fmap` getPOSIXTime + defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username + , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime + , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) + , stHeadingStyles = headingStyles} + let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st + -- create entries for images in word/media/... + let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img + let imageEntries = map toImageEntry imgs + + let stdAttributes = + [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") + ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") + ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") + ,("xmlns:o","urn:schemas-microsoft-com:office:office") + ,("xmlns:v","urn:schemas-microsoft-com:vml") + ,("xmlns:w10","urn:schemas-microsoft-com:office:word") + ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") + ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") + ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] + + + parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels" + let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" + let isFooterNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" + let headers = filterElements isHeaderNode parsedRels + let footers = filterElements isFooterNode parsedRels + + let extractTarget = findAttr (QName "Target" Nothing Nothing) + -- we create [Content_Types].xml and word/_rels/document.xml.rels -- from scratch rather than reading from reference.docx, -- because Word sometimes changes these files when a reference.docx is modified, @@ -132,9 +276,11 @@ writeDocx opts doc@(Pandoc meta _) = do let mkOverrideNode (part', contentType') = mknode "Override" [("PartName",part'),("ContentType",contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _, _) = - mkOverrideNode ("/word/" ++ imgpath, - fromMaybe "application/octet-stream" mbMimeType) - let overrides = map mkOverrideNode + mkOverrideNode ("/word/" ++ imgpath, + fromMaybe "application/octet-stream" mbMimeType) + let mkMediaOverride imgpath = + mkOverrideNode ('/':imgpath, getMimeTypeDef imgpath) + let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") ,("/word/numbering.xml", @@ -155,7 +301,15 @@ writeDocx opts doc@(Pandoc meta _) = do "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") ,("/word/footnotes.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") - ] ++ map mkImageOverride imgs + ] ++ + map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ + map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ + map mkImageOverride imgs ++ + map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] + let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), mknode "Default" @@ -169,7 +323,7 @@ writeDocx opts doc@(Pandoc meta _) = do [("Type",url') ,("Id",id') ,("Target",target')] () - let baserels = map toBaseRel + let baserels' = map toBaseRel [("http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering", "rId1", "numbering.xml") @@ -190,7 +344,13 @@ writeDocx opts doc@(Pandoc meta _) = do "theme/theme1.xml") ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", - "footnotes.xml")] + "footnotes.xml") + ] + + let idMap = renumIdMap (length baserels' + 1) (headers ++ footers) + let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers + let renumFooters = renumIds (\q -> qName q == "Id") idMap footers + let baserels = baserels' ++ renumHeaders ++ renumFooters let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () @@ -199,33 +359,55 @@ writeDocx opts doc@(Pandoc meta _) = do let relEntry = toEntry "word/_rels/document.xml.rels" epochtime $ renderXml reldoc - -- create entries for images in word/media/... - let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img - let imageEntries = map toImageEntry imgs + + -- adjust contents to add sectPr from reference.docx + let sectpr = case mbsectpr of + Just sectpr' -> let cs = renumIds + (\q -> qName q == "id" && qPrefix q == Just "r") + idMap + (elChildren sectpr') + in + add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs + Nothing -> (mknode "w:sectPr" [] ()) + + -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' + let contents' = contents ++ [sectpr] + let docContents = mknode "w:document" stdAttributes + $ mknode "w:body" [] contents' + + -- word/document.xml - let contentEntry = toEntry "word/document.xml" epochtime $ renderXml contents + let contentEntry = toEntry "word/document.xml" epochtime + $ renderXml docContents -- footnotes - let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml footnotes + let notes = mknode "w:footnotes" stdAttributes footnotes + let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml notes -- footnote rels let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime $ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] - $ linkrels + linkrels -- styles let newstyles = styleToOpenXml $ writerHighlightStyle opts - let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive stylepath let styledoc' = styledoc{ elContent = elContent styledoc ++ [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml let numpath = "word/numbering.xml" - numEntry <- (toEntry numpath epochtime . renderXml) - `fmap` mkNumbering (stNumStyles st) (stLists st) + numbering <- parseXml refArchive distArchive numpath + newNumElts <- mkNumbering (stLists st) + let allElts = onlyElems (elContent numbering) ++ newNumElts + let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent = + -- we want all the abstractNums first, then the nums, + -- otherwise things break: + [Elem e | e <- allElts + , qName (elName e) == "abstractNum" ] ++ + [Elem e | e <- allElts + , qName (elName e) == "num" ] } let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -236,8 +418,8 @@ writeDocx opts doc@(Pandoc meta _) = do $ mknode "dc:title" [] (stringify $ docTitle meta) : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) : maybe [] - (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] $ x - , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] $ x + (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x + , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x ]) (normalizeDate $ stringify $ docDate meta) let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps @@ -256,19 +438,27 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels - let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap` - parseXml refArchive path - docPropsAppEntry <- entryFromArchive "docProps/app.xml" - themeEntry <- entryFromArchive "word/theme/theme1.xml" - fontTableEntry <- entryFromArchive "word/fontTable.xml" - settingsEntry <- entryFromArchive "word/settings.xml" - webSettingsEntry <- entryFromArchive "word/webSettings.xml" - let miscRels = [ f | f <- filesInArchive refArchive - , "word/_rels/" `isPrefixOf` f - , ".xml.rels" `isSuffixOf` f - , f /= "word/_rels/document.xml.rels" - , f /= "word/_rels/footnotes.xml.rels" ] - miscRelEntries <- mapM entryFromArchive miscRels + let entryFromArchive arch path = + maybe (fail $ path ++ " corrupt or missing in reference docx") + return + (findEntryByPath path arch `mplus` findEntryByPath path distArchive) + docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" + themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" + fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" + -- we use dist archive for settings.xml, because Word sometimes + -- adds references to footnotes or endnotes we don't have... + settingsEntry <- entryFromArchive distArchive "word/settings.xml" + webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" + headerFooterEntries <- mapM (entryFromArchive refArchive) $ + mapMaybe (fmap ("word/" ++) . extractTarget) + (headers ++ footers) + let miscRelEntries = [ e | e <- zEntries refArchive + , "word/_rels/" `isPrefixOf` (eRelativePath e) + , ".xml.rels" `isSuffixOf` (eRelativePath e) + , eRelativePath e /= "word/_rels/document.xml.rels" + , eRelativePath e /= "word/_rels/footnotes.xml.rels" ] + let otherMediaEntries = [ e | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -276,7 +466,8 @@ writeDocx opts doc@(Pandoc meta _) = do footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : - imageEntries ++ miscRelEntries + imageEntries ++ headerFooterEntries ++ + miscRelEntries ++ otherMediaEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] @@ -314,29 +505,30 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes $ backgroundColor style ) ] -mkNumbering :: M.Map ListMarker Int -> [ListMarker] -> IO Element -mkNumbering markers lists = do - elts <- mapM mkAbstractNum (M.toList markers) - return $ mknode "w:numbering" - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")] - $ elts ++ zipWith (mkNum markers) lists [1..(length lists)] +-- this is the lowest number used for a list numId +baseListId :: Int +baseListId = 1000 + +mkNumbering :: [ListMarker] -> IO [Element] +mkNumbering lists = do + elts <- mapM mkAbstractNum (ordNub lists) + return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] -mkNum :: M.Map ListMarker Int -> ListMarker -> Int -> Element -mkNum markers marker numid = +mkNum :: ListMarker -> Int -> Element +mkNum marker numid = mknode "w:num" [("w:numId",show numid)] - $ mknode "w:abstractNumId" [("w:val",show absnumid)] () + $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] () : case marker of NoMarker -> [] BulletMarker -> [] NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] - where absnumid = fromMaybe 0 $ M.lookup marker markers -mkAbstractNum :: (ListMarker,Int) -> IO Element -mkAbstractNum (marker,numid) = do +mkAbstractNum :: ListMarker -> IO Element +mkAbstractNum marker = do nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) - return $ mknode "w:abstractNum" [("w:abstractNumId",show numid)] + return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () : map (mkLvl marker) [0..6] @@ -388,40 +580,42 @@ mkLvl marker lvl = patternFor _ s = s ++ "." getNumId :: WS Int -getNumId = length `fmap` gets stLists +getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists --- | Convert Pandoc document to two OpenXML elements (the main document and footnotes). -writeOpenXML :: WriterOptions -> Pandoc -> WS (Element, Element) +-- | Convert Pandoc document to two lists of +-- OpenXML elements (the main document and footnotes). +writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) writeOpenXML opts (Pandoc meta blocks) = do let tit = docTitle meta ++ case lookupMeta "subtitle" meta of Just (MetaBlocks [Plain xs]) -> LineBreak : xs _ -> [] let auths = docAuthors meta let dat = docDate meta + let abstract' = case lookupMeta "abstract" meta of + Just (MetaBlocks bs) -> bs + Just (MetaInlines ils) -> [Plain ils] + _ -> [] + let subtitle' = case lookupMeta "subtitle" meta of + Just (MetaBlocks [Plain xs]) -> xs + Just (MetaBlocks [Para xs]) -> xs + Just (MetaInlines xs) -> xs + _ -> [] title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts - [Para (intercalate [LineBreak] auths) | not (null auths)] + subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] + authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $ + map Para auths date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + abstract <- if null abstract' + then return [] + else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs - let blocks' = bottomUp convertSpace $ blocks + let blocks' = bottomUp convertSpace blocks doc' <- blocksToOpenXML opts blocks' notes' <- reverse `fmap` gets stFootnotes - let meta' = title ++ authors ++ date - let stdAttributes = - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") - ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") - ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") - ,("xmlns:o","urn:schemas-microsoft-com:office:office") - ,("xmlns:v","urn:schemas-microsoft-com:vml") - ,("xmlns:w10","urn:schemas-microsoft-com:office:word") - ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") - ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") - ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - let doc = mknode "w:document" stdAttributes $ mknode "w:body" [] (meta' ++ doc') - let notes = mknode "w:footnotes" stdAttributes notes' - return (doc, notes) + let meta' = title ++ subtitle ++ authors ++ date ++ abstract + return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] @@ -441,10 +635,18 @@ getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique -- | Convert a Pandoc block element to OpenXML. blockToOpenXML :: WriterOptions -> Block -> WS [Element] blockToOpenXML _ Null = return [] +blockToOpenXML opts (Div (_,["references"],_) bs) = do + let (hs, bs') = span isHeaderBlock bs + header <- blocksToOpenXML opts hs + -- We put the Bibliography style on paragraphs after the header + rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs' + return (header ++ rest) blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do - contents <- withParaProp (pStyle $ "Heading" ++ show lev) $ - blockToOpenXML opts (Para lst) + headingStyles <- gets stHeadingStyles + paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $ + getParaProps False + contents <- inlinesToOpenXML opts lst usedIdents <- gets stSectionIds let bookmarkName = if null ident then uniqueIdent lst usedIdents @@ -454,7 +656,7 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () - return $ [bookmarkStart] ++ contents ++ [bookmarkEnd] + return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure @@ -494,25 +696,30 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) $ blocksToOpenXML opts cell headers' <- mapM cellToOpenXML $ zip aligns headers - rows' <- mapM (\cells -> mapM cellToOpenXML $ zip aligns cells) - $ rows + rows' <- mapM (mapM cellToOpenXML . zip aligns) rows let borderProps = mknode "w:tcPr" [] [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] + let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] + [mknode "w:pStyle" [("w:val","Compact")] ()]]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents - then [mknode "w:p" [] ()] + then emptyCell else contents let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt + let fullrow = 5000 -- 100% specified in pct + let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" - [("w:w", show $ (floor (textwidth * w) :: Integer))] () + [("w:w", show (floor (textwidth * w) :: Integer))] () return $ - [ mknode "w:tbl" [] + caption' ++ + [mknode "w:tbl" [] ( mknode "w:tblPr" [] - ( [ mknode "w:tblStyle" [("w:val","TableNormal")] () ] ++ + ( mknode "w:tblStyle" [("w:val","TableNormal")] () : + mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] @@ -521,8 +728,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do else map mkgridcol widths) : [ mkrow True headers' | not (all null headers) ] ++ map (mkrow False) rows' - ) - ] ++ caption' + )] blockToOpenXML opts (BulletList lst) = do let marker = BulletMarker addList marker @@ -548,17 +754,13 @@ addList :: ListMarker -> WS () addList marker = do lists <- gets stLists modify $ \st -> st{ stLists = lists ++ [marker] } - numStyles <- gets stNumStyles - case M.lookup marker numStyles of - Just _ -> return () - Nothing -> modify $ \st -> - st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles } listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element] listItemToOpenXML _ _ [] = return [] listItemToOpenXML opts numid (first:rest) = do first' <- withNumId numid $ blockToOpenXML opts first - rest' <- withNumId 1 $ blocksToOpenXML opts rest + -- baseListId is the code for no list marker: + rest' <- withNumId baseListId $ blocksToOpenXML opts rest return $ first' ++ rest' alignmentToString :: Alignment -> [Char] @@ -593,7 +795,7 @@ getTextProps = do props <- gets stTextProperties return $ if null props then [] - else [mknode "w:rPr" [] $ props] + else [mknode "w:rPr" [] props] pushTextProp :: Element -> WS () pushTextProp d = modify $ \s -> s{ stTextProperties = d : stTextProperties s } @@ -639,20 +841,49 @@ withParaProp d p = do formattedString :: String -> WS [Element] formattedString str = do props <- getTextProps + inDel <- gets stInDel return [ mknode "w:r" [] $ props ++ - [ mknode "w:t" [("xml:space","preserve")] str ] ] + [ mknode (if inDel then "w:delText" else "w:t") + [("xml:space","preserve")] str ] ] -- | Convert an inline element to OpenXML. inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] inlineToOpenXML _ (Str str) = formattedString str inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ") -inlineToOpenXML opts (Span (_,classes,_) ils) = do - let off x = withTextProp (mknode x [("w:val","0")] ()) - ((if "csl-no-emph" `elem` classes then off "w:i" else id) . - (if "csl-no-strong" `elem` classes then off "w:b" else id) . - (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id)) - $ inlinesToOpenXML opts ils +inlineToOpenXML opts (Span (_,classes,kvs) ils) + | "insertion" `elem` classes = do + defaultAuthor <- gets stChangesAuthor + defaultDate <- gets stChangesDate + let author = fromMaybe defaultAuthor (lookup "author" kvs) + date = fromMaybe defaultDate (lookup "date" kvs) + insId <- gets stInsId + modify $ \s -> s{stInsId = (insId + 1)} + x <- inlinesToOpenXML opts ils + return [ mknode "w:ins" [("w:id", (show insId)), + ("w:author", author), + ("w:date", date)] + x ] + | "deletion" `elem` classes = do + defaultAuthor <- gets stChangesAuthor + defaultDate <- gets stChangesDate + let author = fromMaybe defaultAuthor (lookup "author" kvs) + date = fromMaybe defaultDate (lookup "date" kvs) + delId <- gets stDelId + modify $ \s -> s{stDelId = (delId + 1)} + modify $ \s -> s{stInDel = True} + x <- inlinesToOpenXML opts ils + modify $ \s -> s{stInDel = False} + return [ mknode "w:del" [("w:id", (show delId)), + ("w:author", author), + ("w:date", date)] + x ] + | otherwise = do + let off x = withTextProp (mknode x [("w:val","0")] ()) + ((if "csl-no-emph" `elem` classes then off "w:i" else id) . + (if "csl-no-strong" `elem` classes then off "w:b" else id) . + (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id)) + $ inlinesToOpenXML opts ils inlineToOpenXML opts (Strong lst) = withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst inlineToOpenXML opts (Emph lst) = @@ -682,9 +913,9 @@ inlineToOpenXML opts (Math mathType str) = do let displayType = if mathType == DisplayMath then DisplayBlock else DisplayInline - case texMathToOMML displayType str of + case writeOMML displayType <$> readTeX str of Right r -> return [r] - Left _ -> inlinesToOpenXML opts (readTeXMath' mathType str) + Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML opts (Code attrs str) = withTextProp (rStyle "VerbatimChar") @@ -741,11 +972,13 @@ inlineToOpenXML opts (Link txt (src,_)) = do return [ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML opts (Image alt (src, tit)) = do -- first, check to see if we've already done this image + pageWidth <- gets stPrintWidth imgs <- gets stImages case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] Nothing -> do - res <- liftIO $ fetchItem (writerSourceURL opts) src + res <- liftIO $ + fetchItem' (writerMediaBag opts) (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." @@ -756,7 +989,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do let size = imageSize img let (xpt,ypt) = maybe (120,120) sizeInPoints size -- 12700 emu = 1 pt - let (xemu,yemu) = (xpt * 12700, ypt * 12700) + let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700) let cNvPicPr = mknode "pic:cNvPicPr" [] $ mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] () let nvPicPr = mknode "pic:nvPicPr" [] @@ -814,10 +1047,20 @@ inlineToOpenXML opts (Image alt (src, tit)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] -parseXml :: Archive -> String -> IO Element -parseXml refArchive relpath = - case findEntryByPath relpath refArchive of - Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of - Just d -> return d - Nothing -> fail $ relpath ++ " corrupt in reference docx" - Nothing -> fail $ relpath ++ " missing in reference docx" +parseXml :: Archive -> Archive -> String -> IO Element +parseXml refArchive distArchive relpath = + case ((findEntryByPath relpath refArchive `mplus` + findEntryByPath relpath distArchive) + >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of + Just d -> return d + Nothing -> fail $ relpath ++ " corrupt or missing in reference docx" + +-- | Scales the image to fit the page +-- sizes are passed in emu +fitToPage :: (Integer, Integer) -> Integer -> (Integer, Integer) +fitToPage (x, y) pageWidth + -- Fixes width to the page width and scales the height + | x > pageWidth = + (pageWidth, round $ + ((fromIntegral pageWidth) / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y)) + | otherwise = (x, y) |