aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/Docx.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Writers/Docx.hs')
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs483
1 files changed, 363 insertions, 120 deletions
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 2a834c2da..5b9cc62ab 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1,6 +1,6 @@
-{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ScopedTypeVariables, PatternGuards #-}
{-
-Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Docx
- Copyright : Copyright (C) 2012 John MacFarlane
+ Copyright : Copyright (C) 2012-2014 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -29,8 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to docx.
-}
module Text.Pandoc.Writers.Docx ( writeDocx ) where
-import Data.Maybe (fromMaybe)
-import Data.List ( intercalate, isPrefixOf, isSuffixOf )
+import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix )
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
@@ -39,6 +38,10 @@ import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Compat.Monoid ((<>))
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
+import Data.Time.Clock
+import Data.Time.Format
+import System.Environment
+import System.Locale
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.ImageSize
@@ -49,7 +52,7 @@ import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Highlighting ( highlight )
import Text.Pandoc.Walk
import Text.Highlighting.Kate.Types ()
-import Text.XML.Light
+import Text.XML.Light as XML
import Text.TeXMath
import Control.Monad.State
import Text.Highlighting.Kate
@@ -57,8 +60,35 @@ import Data.Unique (hashUnique, newUnique)
import System.Random (randomRIO)
import Text.Printf (printf)
import qualified Control.Exception as E
-import Text.Pandoc.MIME (getMimeType, extensionFromMimeType)
-import Control.Applicative ((<|>))
+import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
+ extensionFromMimeType)
+import Control.Applicative ((<$>), (<|>), (<*>))
+import Data.Maybe (fromMaybe, mapMaybe)
+import Data.Char (isDigit)
+
+data ListMarker = NoMarker
+ | BulletMarker
+ | NumberMarker ListNumberStyle ListNumberDelim Int
+ deriving (Show, Read, Eq, Ord)
+
+listMarkerToId :: ListMarker -> String
+listMarkerToId NoMarker = "990"
+listMarkerToId BulletMarker = "991"
+listMarkerToId (NumberMarker sty delim n) =
+ '9' : '9' : styNum : delimNum : show n
+ where styNum = case sty of
+ DefaultStyle -> '2'
+ Example -> '3'
+ Decimal -> '4'
+ LowerRoman -> '5'
+ UpperRoman -> '6'
+ LowerAlpha -> '7'
+ UpperAlpha -> '8'
+ delimNum = case delim of
+ DefaultDelim -> '0'
+ Period -> '1'
+ OneParen -> '2'
+ TwoParens -> '3'
data WriterState = WriterState{
stTextProperties :: [Element]
@@ -66,18 +96,19 @@ data WriterState = WriterState{
, stFootnotes :: [Element]
, stSectionIds :: [String]
, stExternalLinks :: M.Map String String
- , stImages :: M.Map FilePath (String, String, Maybe String, Element, B.ByteString)
+ , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString)
, stListLevel :: Int
, stListNumId :: Int
- , stNumStyles :: M.Map ListMarker Int
, stLists :: [ListMarker]
+ , stInsId :: Int
+ , stDelId :: Int
+ , stInDel :: Bool
+ , stChangesAuthor :: String
+ , stChangesDate :: String
+ , stPrintWidth :: Integer
+ , stHeadingStyles :: [(Int,String)]
}
-data ListMarker = NoMarker
- | BulletMarker
- | NumberMarker ListNumberStyle ListNumberDelim Int
- deriving (Show, Read, Eq, Ord)
-
defaultWriterState :: WriterState
defaultWriterState = WriterState{
stTextProperties = []
@@ -88,15 +119,27 @@ defaultWriterState = WriterState{
, stImages = M.empty
, stListLevel = -1
, stListNumId = 1
- , stNumStyles = M.fromList [(NoMarker, 0)]
, stLists = [NoMarker]
+ , stInsId = 1
+ , stDelId = 1
+ , stInDel = False
+ , stChangesAuthor = "unknown"
+ , stChangesDate = "1969-12-31T19:00:00Z"
+ , stPrintWidth = 1
+ , stHeadingStyles = []
}
type WS a = StateT WriterState IO a
mknode :: Node t => String -> [(String,String)] -> t -> Element
mknode s attrs =
- add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s)
+ add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s)
+
+nodename :: String -> QName
+nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix }
+ where (name, prefix) = case break (==':') s of
+ (xs,[]) -> (xs, Nothing)
+ (ys, _:zs) -> (zs, Just ys)
toLazy :: B.ByteString -> BL.ByteString
toLazy = BL.fromChunks . (:[])
@@ -105,6 +148,31 @@ renderXml :: Element -> BL.ByteString
renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
UTF8.fromStringLazy (showElement elt)
+renumIdMap :: Int -> [Element] -> M.Map String String
+renumIdMap _ [] = M.empty
+renumIdMap n (e:es)
+ | Just oldId <- findAttr (QName "Id" Nothing Nothing) e =
+ M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es)
+ | otherwise = renumIdMap n es
+
+replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr]
+replaceAttr _ _ [] = []
+replaceAttr f val (a:as) | f (attrKey a) =
+ (XML.Attr (attrKey a) val) : (replaceAttr f val as)
+ | otherwise = a : (replaceAttr f val as)
+
+renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element
+renumId f renumMap e
+ | Just oldId <- findAttrBy f e
+ , Just newId <- M.lookup oldId renumMap =
+ let attrs' = replaceAttr f newId (elAttribs e)
+ in
+ e { elAttribs = attrs' }
+ | otherwise = e
+
+renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element]
+renumIds f renumMap = map (renumId f renumMap)
+
-- | Produce an Docx file from a Pandoc document.
writeDocx :: WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
@@ -112,16 +180,92 @@ writeDocx :: WriterOptions -- ^ Writer options
writeDocx opts doc@(Pandoc meta _) = do
let datadir = writerUserDataDir opts
let doc' = walk fixDisplayMath doc
+ username <- lookup "USERNAME" <$> getEnvironment
+ utctime <- getCurrentTime
refArchive <- liftM (toArchive . toLazy) $
case writerReferenceDocx opts of
Just f -> B.readFile f
Nothing -> readDataFile datadir "reference.docx"
+ distArchive <- liftM (toArchive . toLazy) $ readDataFile datadir "reference.docx"
+
+ parsedDoc <- parseXml refArchive distArchive "word/document.xml"
+ let wname f qn = qPrefix qn == Just "w" && f (qName qn)
+ let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc
+
+ -- Gets the template size
+ let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz")))
+ let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName))
+
+ let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar")))
+ let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName))
+ let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName))
+
+ -- Get the avaible area (converting the size and the margins to int and
+ -- doing the difference
+ let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer)
+ <*> (
+ (+) <$> (read <$> mbAttrMarRight ::Maybe Integer)
+ <*> (read <$> mbAttrMarLeft ::Maybe Integer)
+ )
+
+ -- styles
+ let stylepath = "word/styles.xml"
+ styledoc <- parseXml refArchive distArchive stylepath
+
+ -- parse styledoc for heading styles
+ let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) .
+ filter ((==Just "xmlns") . qPrefix . attrKey) .
+ elAttribs $ styledoc
+ let headingStyles =
+ let
+ mywURI = lookup "w" styleNamespaces
+ myName name = QName name mywURI (Just "w")
+ getAttrStyleId = findAttr (myName "styleId")
+ getNameVal = findChild (myName "name") >=> findAttr (myName "val")
+ getNum s | not $ null s, all isDigit s = Just (read s :: Int)
+ | otherwise = Nothing
+ getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum
+ getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum
+ toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId
+ toMap getF = mapMaybe (toTuple getF) $
+ findChildren (myName "style") styledoc
+ select a b | not $ null a = a
+ | otherwise = b
+ in
+ select (toMap getEngHeader) (toMap getIntHeader)
((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc')
- defaultWriterState
- epochtime <- floor `fmap` getPOSIXTime
+ defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username
+ , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
+ , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth)
+ , stHeadingStyles = headingStyles}
+ let epochtime = floor $ utcTimeToPOSIXSeconds utctime
let imgs = M.elems $ stImages st
+ -- create entries for images in word/media/...
+ let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
+ let imageEntries = map toImageEntry imgs
+
+ let stdAttributes =
+ [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
+ ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
+ ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
+ ,("xmlns:o","urn:schemas-microsoft-com:office:office")
+ ,("xmlns:v","urn:schemas-microsoft-com:vml")
+ ,("xmlns:w10","urn:schemas-microsoft-com:office:word")
+ ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
+ ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
+ ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
+
+
+ parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels"
+ let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
+ let isFooterNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
+ let headers = filterElements isHeaderNode parsedRels
+ let footers = filterElements isFooterNode parsedRels
+
+ let extractTarget = findAttr (QName "Target" Nothing Nothing)
+
-- we create [Content_Types].xml and word/_rels/document.xml.rels
-- from scratch rather than reading from reference.docx,
-- because Word sometimes changes these files when a reference.docx is modified,
@@ -132,9 +276,11 @@ writeDocx opts doc@(Pandoc meta _) = do
let mkOverrideNode (part', contentType') = mknode "Override"
[("PartName",part'),("ContentType",contentType')] ()
let mkImageOverride (_, imgpath, mbMimeType, _, _) =
- mkOverrideNode ("/word/" ++ imgpath,
- fromMaybe "application/octet-stream" mbMimeType)
- let overrides = map mkOverrideNode
+ mkOverrideNode ("/word/" ++ imgpath,
+ fromMaybe "application/octet-stream" mbMimeType)
+ let mkMediaOverride imgpath =
+ mkOverrideNode ('/':imgpath, getMimeTypeDef imgpath)
+ let overrides = map mkOverrideNode (
[("/word/webSettings.xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
,("/word/numbering.xml",
@@ -155,7 +301,15 @@ writeDocx opts doc@(Pandoc meta _) = do
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
,("/word/footnotes.xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
- ] ++ map mkImageOverride imgs
+ ] ++
+ map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++
+ map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
+ map mkImageOverride imgs ++
+ map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive
+ , "word/media/" `isPrefixOf` eRelativePath e ]
+
let defaultnodes = [mknode "Default"
[("Extension","xml"),("ContentType","application/xml")] (),
mknode "Default"
@@ -169,7 +323,7 @@ writeDocx opts doc@(Pandoc meta _) = do
[("Type",url')
,("Id",id')
,("Target",target')] ()
- let baserels = map toBaseRel
+ let baserels' = map toBaseRel
[("http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering",
"rId1",
"numbering.xml")
@@ -190,7 +344,13 @@ writeDocx opts doc@(Pandoc meta _) = do
"theme/theme1.xml")
,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
"rId7",
- "footnotes.xml")]
+ "footnotes.xml")
+ ]
+
+ let idMap = renumIdMap (length baserels' + 1) (headers ++ footers)
+ let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers
+ let renumFooters = renumIds (\q -> qName q == "Id") idMap footers
+ let baserels = baserels' ++ renumHeaders ++ renumFooters
let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
let imgrels = map toImgRel imgs
let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
@@ -199,33 +359,55 @@ writeDocx opts doc@(Pandoc meta _) = do
let relEntry = toEntry "word/_rels/document.xml.rels" epochtime
$ renderXml reldoc
- -- create entries for images in word/media/...
- let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
- let imageEntries = map toImageEntry imgs
+
+ -- adjust contents to add sectPr from reference.docx
+ let sectpr = case mbsectpr of
+ Just sectpr' -> let cs = renumIds
+ (\q -> qName q == "id" && qPrefix q == Just "r")
+ idMap
+ (elChildren sectpr')
+ in
+ add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs
+ Nothing -> (mknode "w:sectPr" [] ())
+
+ -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr'
+ let contents' = contents ++ [sectpr]
+ let docContents = mknode "w:document" stdAttributes
+ $ mknode "w:body" [] contents'
+
+
-- word/document.xml
- let contentEntry = toEntry "word/document.xml" epochtime $ renderXml contents
+ let contentEntry = toEntry "word/document.xml" epochtime
+ $ renderXml docContents
-- footnotes
- let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml footnotes
+ let notes = mknode "w:footnotes" stdAttributes footnotes
+ let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml notes
-- footnote rels
let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime
$ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")]
- $ linkrels
+ linkrels
-- styles
let newstyles = styleToOpenXml $ writerHighlightStyle opts
- let stylepath = "word/styles.xml"
- styledoc <- parseXml refArchive stylepath
let styledoc' = styledoc{ elContent = elContent styledoc ++
[Elem x | x <- newstyles, writerHighlight opts] }
let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
-- construct word/numbering.xml
let numpath = "word/numbering.xml"
- numEntry <- (toEntry numpath epochtime . renderXml)
- `fmap` mkNumbering (stNumStyles st) (stLists st)
+ numbering <- parseXml refArchive distArchive numpath
+ newNumElts <- mkNumbering (stLists st)
+ let allElts = onlyElems (elContent numbering) ++ newNumElts
+ let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent =
+ -- we want all the abstractNums first, then the nums,
+ -- otherwise things break:
+ [Elem e | e <- allElts
+ , qName (elName e) == "abstractNum" ] ++
+ [Elem e | e <- allElts
+ , qName (elName e) == "num" ] }
let docPropsPath = "docProps/core.xml"
let docProps = mknode "cp:coreProperties"
[("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
@@ -236,8 +418,8 @@ writeDocx opts doc@(Pandoc meta _) = do
$ mknode "dc:title" [] (stringify $ docTitle meta)
: mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta))
: maybe []
- (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] $ x
- , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] $ x
+ (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
+ , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
]) (normalizeDate $ stringify $ docDate meta)
let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
@@ -256,19 +438,27 @@ writeDocx opts doc@(Pandoc meta _) = do
]
let relsEntry = toEntry relsPath epochtime $ renderXml rels
- let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap`
- parseXml refArchive path
- docPropsAppEntry <- entryFromArchive "docProps/app.xml"
- themeEntry <- entryFromArchive "word/theme/theme1.xml"
- fontTableEntry <- entryFromArchive "word/fontTable.xml"
- settingsEntry <- entryFromArchive "word/settings.xml"
- webSettingsEntry <- entryFromArchive "word/webSettings.xml"
- let miscRels = [ f | f <- filesInArchive refArchive
- , "word/_rels/" `isPrefixOf` f
- , ".xml.rels" `isSuffixOf` f
- , f /= "word/_rels/document.xml.rels"
- , f /= "word/_rels/footnotes.xml.rels" ]
- miscRelEntries <- mapM entryFromArchive miscRels
+ let entryFromArchive arch path =
+ maybe (fail $ path ++ " corrupt or missing in reference docx")
+ return
+ (findEntryByPath path arch `mplus` findEntryByPath path distArchive)
+ docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml"
+ themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml"
+ fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
+ -- we use dist archive for settings.xml, because Word sometimes
+ -- adds references to footnotes or endnotes we don't have...
+ settingsEntry <- entryFromArchive distArchive "word/settings.xml"
+ webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
+ headerFooterEntries <- mapM (entryFromArchive refArchive) $
+ mapMaybe (fmap ("word/" ++) . extractTarget)
+ (headers ++ footers)
+ let miscRelEntries = [ e | e <- zEntries refArchive
+ , "word/_rels/" `isPrefixOf` (eRelativePath e)
+ , ".xml.rels" `isSuffixOf` (eRelativePath e)
+ , eRelativePath e /= "word/_rels/document.xml.rels"
+ , eRelativePath e /= "word/_rels/footnotes.xml.rels" ]
+ let otherMediaEntries = [ e | e <- zEntries refArchive
+ , "word/media/" `isPrefixOf` eRelativePath e ]
-- Create archive
let archive = foldr addEntryToArchive emptyArchive $
@@ -276,7 +466,8 @@ writeDocx opts doc@(Pandoc meta _) = do
footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
docPropsEntry : docPropsAppEntry : themeEntry :
fontTableEntry : settingsEntry : webSettingsEntry :
- imageEntries ++ miscRelEntries
+ imageEntries ++ headerFooterEntries ++
+ miscRelEntries ++ otherMediaEntries
return $ fromArchive archive
styleToOpenXml :: Style -> [Element]
@@ -314,29 +505,30 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes
$ backgroundColor style )
]
-mkNumbering :: M.Map ListMarker Int -> [ListMarker] -> IO Element
-mkNumbering markers lists = do
- elts <- mapM mkAbstractNum (M.toList markers)
- return $ mknode "w:numbering"
- [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")]
- $ elts ++ zipWith (mkNum markers) lists [1..(length lists)]
+-- this is the lowest number used for a list numId
+baseListId :: Int
+baseListId = 1000
+
+mkNumbering :: [ListMarker] -> IO [Element]
+mkNumbering lists = do
+ elts <- mapM mkAbstractNum (ordNub lists)
+ return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
-mkNum :: M.Map ListMarker Int -> ListMarker -> Int -> Element
-mkNum markers marker numid =
+mkNum :: ListMarker -> Int -> Element
+mkNum marker numid =
mknode "w:num" [("w:numId",show numid)]
- $ mknode "w:abstractNumId" [("w:val",show absnumid)] ()
+ $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] ()
: case marker of
NoMarker -> []
BulletMarker -> []
NumberMarker _ _ start ->
map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
$ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
- where absnumid = fromMaybe 0 $ M.lookup marker markers
-mkAbstractNum :: (ListMarker,Int) -> IO Element
-mkAbstractNum (marker,numid) = do
+mkAbstractNum :: ListMarker -> IO Element
+mkAbstractNum marker = do
nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer)
- return $ mknode "w:abstractNum" [("w:abstractNumId",show numid)]
+ return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
$ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
: mknode "w:multiLevelType" [("w:val","multilevel")] ()
: map (mkLvl marker) [0..6]
@@ -388,40 +580,42 @@ mkLvl marker lvl =
patternFor _ s = s ++ "."
getNumId :: WS Int
-getNumId = length `fmap` gets stLists
+getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
--- | Convert Pandoc document to two OpenXML elements (the main document and footnotes).
-writeOpenXML :: WriterOptions -> Pandoc -> WS (Element, Element)
+-- | Convert Pandoc document to two lists of
+-- OpenXML elements (the main document and footnotes).
+writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element])
writeOpenXML opts (Pandoc meta blocks) = do
let tit = docTitle meta ++ case lookupMeta "subtitle" meta of
Just (MetaBlocks [Plain xs]) -> LineBreak : xs
_ -> []
let auths = docAuthors meta
let dat = docDate meta
+ let abstract' = case lookupMeta "abstract" meta of
+ Just (MetaBlocks bs) -> bs
+ Just (MetaInlines ils) -> [Plain ils]
+ _ -> []
+ let subtitle' = case lookupMeta "subtitle" meta of
+ Just (MetaBlocks [Plain xs]) -> xs
+ Just (MetaBlocks [Para xs]) -> xs
+ Just (MetaInlines xs) -> xs
+ _ -> []
title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
- authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts
- [Para (intercalate [LineBreak] auths) | not (null auths)]
+ subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
+ authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $
+ map Para auths
date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
+ abstract <- if null abstract'
+ then return []
+ else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract'
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
convertSpace xs = xs
- let blocks' = bottomUp convertSpace $ blocks
+ let blocks' = bottomUp convertSpace blocks
doc' <- blocksToOpenXML opts blocks'
notes' <- reverse `fmap` gets stFootnotes
- let meta' = title ++ authors ++ date
- let stdAttributes =
- [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
- ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
- ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
- ,("xmlns:o","urn:schemas-microsoft-com:office:office")
- ,("xmlns:v","urn:schemas-microsoft-com:vml")
- ,("xmlns:w10","urn:schemas-microsoft-com:office:word")
- ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
- ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
- ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
- let doc = mknode "w:document" stdAttributes $ mknode "w:body" [] (meta' ++ doc')
- let notes = mknode "w:footnotes" stdAttributes notes'
- return (doc, notes)
+ let meta' = title ++ subtitle ++ authors ++ date ++ abstract
+ return (meta' ++ doc', notes')
-- | Convert a list of Pandoc blocks to OpenXML.
blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
@@ -441,10 +635,18 @@ getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique
-- | Convert a Pandoc block element to OpenXML.
blockToOpenXML :: WriterOptions -> Block -> WS [Element]
blockToOpenXML _ Null = return []
+blockToOpenXML opts (Div (_,["references"],_) bs) = do
+ let (hs, bs') = span isHeaderBlock bs
+ header <- blocksToOpenXML opts hs
+ -- We put the Bibliography style on paragraphs after the header
+ rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs'
+ return (header ++ rest)
blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs
blockToOpenXML opts (Header lev (ident,_,_) lst) = do
- contents <- withParaProp (pStyle $ "Heading" ++ show lev) $
- blockToOpenXML opts (Para lst)
+ headingStyles <- gets stHeadingStyles
+ paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $
+ getParaProps False
+ contents <- inlinesToOpenXML opts lst
usedIdents <- gets stSectionIds
let bookmarkName = if null ident
then uniqueIdent lst usedIdents
@@ -454,7 +656,7 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do
let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
,("w:name",bookmarkName)] ()
let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
- return $ [bookmarkStart] ++ contents ++ [bookmarkEnd]
+ return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)]
blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact")
$ blockToOpenXML opts (Para lst)
-- title beginning with fig: indicates that the image is a figure
@@ -494,25 +696,30 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do
let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
$ blocksToOpenXML opts cell
headers' <- mapM cellToOpenXML $ zip aligns headers
- rows' <- mapM (\cells -> mapM cellToOpenXML $ zip aligns cells)
- $ rows
+ rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
let borderProps = mknode "w:tcPr" []
[ mknode "w:tcBorders" []
$ mknode "w:bottom" [("w:val","single")] ()
, mknode "w:vAlign" [("w:val","bottom")] () ]
+ let emptyCell = [mknode "w:p" [] [mknode "w:pPr" []
+ [mknode "w:pStyle" [("w:val","Compact")] ()]]]
let mkcell border contents = mknode "w:tc" []
$ [ borderProps | border ] ++
if null contents
- then [mknode "w:p" [] ()]
+ then emptyCell
else contents
let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells
let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
+ let fullrow = 5000 -- 100% specified in pct
+ let rowwidth = fullrow * sum widths
let mkgridcol w = mknode "w:gridCol"
- [("w:w", show $ (floor (textwidth * w) :: Integer))] ()
+ [("w:w", show (floor (textwidth * w) :: Integer))] ()
return $
- [ mknode "w:tbl" []
+ caption' ++
+ [mknode "w:tbl" []
( mknode "w:tblPr" []
- ( [ mknode "w:tblStyle" [("w:val","TableNormal")] () ] ++
+ ( mknode "w:tblStyle" [("w:val","TableNormal")] () :
+ mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
[ mknode "w:tblCaption" [("w:val", captionStr)] ()
| not (null caption) ] )
: mknode "w:tblGrid" []
@@ -521,8 +728,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do
else map mkgridcol widths)
: [ mkrow True headers' | not (all null headers) ] ++
map (mkrow False) rows'
- )
- ] ++ caption'
+ )]
blockToOpenXML opts (BulletList lst) = do
let marker = BulletMarker
addList marker
@@ -548,17 +754,13 @@ addList :: ListMarker -> WS ()
addList marker = do
lists <- gets stLists
modify $ \st -> st{ stLists = lists ++ [marker] }
- numStyles <- gets stNumStyles
- case M.lookup marker numStyles of
- Just _ -> return ()
- Nothing -> modify $ \st ->
- st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles }
listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element]
listItemToOpenXML _ _ [] = return []
listItemToOpenXML opts numid (first:rest) = do
first' <- withNumId numid $ blockToOpenXML opts first
- rest' <- withNumId 1 $ blocksToOpenXML opts rest
+ -- baseListId is the code for no list marker:
+ rest' <- withNumId baseListId $ blocksToOpenXML opts rest
return $ first' ++ rest'
alignmentToString :: Alignment -> [Char]
@@ -593,7 +795,7 @@ getTextProps = do
props <- gets stTextProperties
return $ if null props
then []
- else [mknode "w:rPr" [] $ props]
+ else [mknode "w:rPr" [] props]
pushTextProp :: Element -> WS ()
pushTextProp d = modify $ \s -> s{ stTextProperties = d : stTextProperties s }
@@ -639,20 +841,49 @@ withParaProp d p = do
formattedString :: String -> WS [Element]
formattedString str = do
props <- getTextProps
+ inDel <- gets stInDel
return [ mknode "w:r" [] $
props ++
- [ mknode "w:t" [("xml:space","preserve")] str ] ]
+ [ mknode (if inDel then "w:delText" else "w:t")
+ [("xml:space","preserve")] str ] ]
-- | Convert an inline element to OpenXML.
inlineToOpenXML :: WriterOptions -> Inline -> WS [Element]
inlineToOpenXML _ (Str str) = formattedString str
inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ")
-inlineToOpenXML opts (Span (_,classes,_) ils) = do
- let off x = withTextProp (mknode x [("w:val","0")] ())
- ((if "csl-no-emph" `elem` classes then off "w:i" else id) .
- (if "csl-no-strong" `elem` classes then off "w:b" else id) .
- (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id))
- $ inlinesToOpenXML opts ils
+inlineToOpenXML opts (Span (_,classes,kvs) ils)
+ | "insertion" `elem` classes = do
+ defaultAuthor <- gets stChangesAuthor
+ defaultDate <- gets stChangesDate
+ let author = fromMaybe defaultAuthor (lookup "author" kvs)
+ date = fromMaybe defaultDate (lookup "date" kvs)
+ insId <- gets stInsId
+ modify $ \s -> s{stInsId = (insId + 1)}
+ x <- inlinesToOpenXML opts ils
+ return [ mknode "w:ins" [("w:id", (show insId)),
+ ("w:author", author),
+ ("w:date", date)]
+ x ]
+ | "deletion" `elem` classes = do
+ defaultAuthor <- gets stChangesAuthor
+ defaultDate <- gets stChangesDate
+ let author = fromMaybe defaultAuthor (lookup "author" kvs)
+ date = fromMaybe defaultDate (lookup "date" kvs)
+ delId <- gets stDelId
+ modify $ \s -> s{stDelId = (delId + 1)}
+ modify $ \s -> s{stInDel = True}
+ x <- inlinesToOpenXML opts ils
+ modify $ \s -> s{stInDel = False}
+ return [ mknode "w:del" [("w:id", (show delId)),
+ ("w:author", author),
+ ("w:date", date)]
+ x ]
+ | otherwise = do
+ let off x = withTextProp (mknode x [("w:val","0")] ())
+ ((if "csl-no-emph" `elem` classes then off "w:i" else id) .
+ (if "csl-no-strong" `elem` classes then off "w:b" else id) .
+ (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id))
+ $ inlinesToOpenXML opts ils
inlineToOpenXML opts (Strong lst) =
withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
inlineToOpenXML opts (Emph lst) =
@@ -682,9 +913,9 @@ inlineToOpenXML opts (Math mathType str) = do
let displayType = if mathType == DisplayMath
then DisplayBlock
else DisplayInline
- case texMathToOMML displayType str of
+ case writeOMML displayType <$> readTeX str of
Right r -> return [r]
- Left _ -> inlinesToOpenXML opts (readTeXMath' mathType str)
+ Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str)
inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
inlineToOpenXML opts (Code attrs str) =
withTextProp (rStyle "VerbatimChar")
@@ -741,11 +972,13 @@ inlineToOpenXML opts (Link txt (src,_)) = do
return [ mknode "w:hyperlink" [("r:id",id')] contents ]
inlineToOpenXML opts (Image alt (src, tit)) = do
-- first, check to see if we've already done this image
+ pageWidth <- gets stPrintWidth
imgs <- gets stImages
case M.lookup src imgs of
Just (_,_,_,elt,_) -> return [elt]
Nothing -> do
- res <- liftIO $ fetchItem (writerSourceURL opts) src
+ res <- liftIO $
+ fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
case res of
Left (_ :: E.SomeException) -> do
liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."
@@ -756,7 +989,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
let size = imageSize img
let (xpt,ypt) = maybe (120,120) sizeInPoints size
-- 12700 emu = 1 pt
- let (xemu,yemu) = (xpt * 12700, ypt * 12700)
+ let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700)
let cNvPicPr = mknode "pic:cNvPicPr" [] $
mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] ()
let nvPicPr = mknode "pic:nvPicPr" []
@@ -814,10 +1047,20 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
br :: Element
br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ]
-parseXml :: Archive -> String -> IO Element
-parseXml refArchive relpath =
- case findEntryByPath relpath refArchive of
- Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of
- Just d -> return d
- Nothing -> fail $ relpath ++ " corrupt in reference docx"
- Nothing -> fail $ relpath ++ " missing in reference docx"
+parseXml :: Archive -> Archive -> String -> IO Element
+parseXml refArchive distArchive relpath =
+ case ((findEntryByPath relpath refArchive `mplus`
+ findEntryByPath relpath distArchive)
+ >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of
+ Just d -> return d
+ Nothing -> fail $ relpath ++ " corrupt or missing in reference docx"
+
+-- | Scales the image to fit the page
+-- sizes are passed in emu
+fitToPage :: (Integer, Integer) -> Integer -> (Integer, Integer)
+fitToPage (x, y) pageWidth
+ -- Fixes width to the page width and scales the height
+ | x > pageWidth =
+ (pageWidth, round $
+ ((fromIntegral pageWidth) / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y))
+ | otherwise = (x, y)