diff options
Diffstat (limited to 'src/Text/Pandoc/Writers/Docx.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 93 |
1 files changed, 70 insertions, 23 deletions
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 5320a2816..f8e8cc34d 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, isPrefixOf, isSuffixOf ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -62,8 +62,9 @@ import Text.Printf (printf) import qualified Control.Exception as E import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) -import Control.Applicative ((<$>), (<|>)) +import Control.Applicative ((<$>), (<|>), (<*>)) import Data.Maybe (fromMaybe, mapMaybe) +import Data.Char (isDigit) data ListMarker = NoMarker | BulletMarker @@ -104,6 +105,8 @@ data WriterState = WriterState{ , stInDel :: Bool , stChangesAuthor :: String , stChangesDate :: String + , stPrintWidth :: Integer + , stHeadingStyles :: [(Int,String)] } defaultWriterState :: WriterState @@ -122,6 +125,8 @@ defaultWriterState = WriterState{ , stInDel = False , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" + , stPrintWidth = 1 + , stHeadingStyles = [] } type WS a = StateT WriterState IO a @@ -181,11 +186,59 @@ writeDocx opts doc@(Pandoc meta _) = do case writerReferenceDocx opts of Just f -> B.readFile f Nothing -> readDataFile datadir "reference.docx" - distArchive <- liftM (toArchive . toLazy) $ readDataFile Nothing "reference.docx" + distArchive <- liftM (toArchive . toLazy) $ readDataFile datadir "reference.docx" + + parsedDoc <- parseXml refArchive distArchive "word/document.xml" + let wname f qn = qPrefix qn == Just "w" && f (qName qn) + let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + + -- Gets the template size + let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz"))) + let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName)) + + let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar"))) + let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName)) + let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName)) + + -- Get the avaible area (converting the size and the margins to int and + -- doing the difference + let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer) + <*> ( + (+) <$> (read <$> mbAttrMarRight ::Maybe Integer) + <*> (read <$> mbAttrMarLeft ::Maybe Integer) + ) + + -- styles + let stylepath = "word/styles.xml" + styledoc <- parseXml refArchive distArchive stylepath + + -- parse styledoc for heading styles + let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . + filter ((==Just "xmlns") . qPrefix . attrKey) . + elAttribs $ styledoc + let headingStyles = + let + mywURI = lookup "w" styleNamespaces + myName name = QName name mywURI (Just "w") + getAttrStyleId = findAttr (myName "styleId") + getNameVal = findChild (myName "name") >=> findAttr (myName "val") + getNum s | not $ null s, all isDigit s = Just (read s :: Int) + | otherwise = Nothing + getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum + getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum + toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId + toMap getF = mapMaybe (toTuple getF) $ + findChildren (myName "style") styledoc + select a b | not $ null a = a + | otherwise = b + in + select (toMap getEngHeader) (toMap getIntHeader) ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username - , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime} + , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime + , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) + , stHeadingStyles = headingStyles} let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -193,9 +246,6 @@ writeDocx opts doc@(Pandoc meta _) = do let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img let imageEntries = map toImageEntry imgs - - - let stdAttributes = [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") @@ -310,10 +360,7 @@ writeDocx opts doc@(Pandoc meta _) = do $ renderXml reldoc - -- adjust contents to add sectPr from reference.docx - parsedDoc <- parseXml refArchive distArchive "word/document.xml" - let wname f qn = qPrefix qn == Just "w" && f (qName qn) - let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + -- adjust contents to add sectPr from reference.docx let sectpr = case mbsectpr of Just sectpr' -> let cs = renumIds (\q -> qName q == "id" && qPrefix q == Just "r") @@ -323,8 +370,6 @@ writeDocx opts doc@(Pandoc meta _) = do add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs Nothing -> (mknode "w:sectPr" [] ()) - - -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' let contents' = contents ++ [sectpr] let docContents = mknode "w:document" stdAttributes @@ -347,8 +392,6 @@ writeDocx opts doc@(Pandoc meta _) = do -- styles let newstyles = styleToOpenXml $ writerHighlightStyle opts - let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive distArchive stylepath let styledoc' = styledoc{ elContent = elContent styledoc ++ [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' @@ -600,8 +643,9 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do return (header ++ rest) blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do - paraProps <- withParaProp (pStyle $ "Heading" ++ show lev) $ - getParaProps False + headingStyles <- gets stHeadingStyles + paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $ + getParaProps False contents <- inlinesToOpenXML opts lst usedIdents <- gets stSectionIds let bookmarkName = if null ident @@ -927,6 +971,7 @@ inlineToOpenXML opts (Link txt (src,_)) = do return [ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML opts (Image alt (src, tit)) = do -- first, check to see if we've already done this image + pageWidth <- gets stPrintWidth imgs <- gets stImages case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] @@ -943,7 +988,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do let size = imageSize img let (xpt,ypt) = maybe (120,120) sizeInPoints size -- 12700 emu = 1 pt - let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) + let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700) let cNvPicPr = mknode "pic:cNvPicPr" [] $ mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] () let nvPicPr = mknode "pic:nvPicPr" [] @@ -1010,9 +1055,11 @@ parseXml refArchive distArchive relpath = Nothing -> fail $ relpath ++ " corrupt or missing in reference docx" -- | Scales the image to fit the page -fitToPage :: (Integer, Integer) -> (Integer, Integer) -fitToPage (x, y) - --5440680 is the emu width size of a letter page in portrait, minus the margins - | x > 5440680 = - (5440680, round $ (5440680 / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y)) +-- sizes are passed in emu +fitToPage :: (Integer, Integer) -> Integer -> (Integer, Integer) +fitToPage (x, y) pageWidth + -- Fixes width to the page width and scales the height + | x > pageWidth = + (pageWidth, round $ + ((fromIntegral pageWidth) / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y)) | otherwise = (x, y) |