aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/Docx
diff options
context:
space:
mode:
authorIgor Pashev <pashev.igor@gmail.com>2021-07-17 18:10:34 +0200
committerIgor Pashev <pashev.igor@gmail.com>2021-07-17 18:46:16 +0200
commit48459559a13a20083fc9b31eb523b8ea2bf0a63f (patch)
tree1c04e75709457403110a6f8c5c90099f22369de3 /src/Text/Pandoc/Writers/Docx
parent0c39509d9b6a58958228cebf5d643598e5c98950 (diff)
parent46099e79defe662e541b12548200caf29063c1c6 (diff)
downloadpandoc-48459559a13a20083fc9b31eb523b8ea2bf0a63f.tar.gz
Merge branch 'master' of https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Writers/Docx')
-rw-r--r--src/Text/Pandoc/Writers/Docx/StyleMap.hs2
-rw-r--r--src/Text/Pandoc/Writers/Docx/Table.hs227
-rw-r--r--src/Text/Pandoc/Writers/Docx/Types.hs185
3 files changed, 413 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Writers/Docx/StyleMap.hs b/src/Text/Pandoc/Writers/Docx/StyleMap.hs
index c3c54c7e5..04868eaad 100644
--- a/src/Text/Pandoc/Writers/Docx/StyleMap.hs
+++ b/src/Text/Pandoc/Writers/Docx/StyleMap.hs
@@ -2,7 +2,7 @@
{- |
Module : Text.Pandoc.Writers.Docx.StyleMap
Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
- 2014-2020 John MacFarlane <jgm@berkeley.edu>,
+ 2014-2021 John MacFarlane <jgm@berkeley.edu>,
2015-2019 Nikolay Yakimov <root@livid.pp.ru>
License : GNU GPL, version 2 or above
diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
new file mode 100644
index 000000000..7a84c5278
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -0,0 +1,227 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{- |
+Module : Text.Pandoc.Writers.Docx.Table
+Copyright : Copyright (C) 2012-2021 John MacFarlane
+License : GNU GPL, version 2 or above
+Maintainer : John MacFarlane <jgm@berkeley.edu>
+
+Conversion of table blocks to docx.
+-}
+module Text.Pandoc.Writers.Docx.Table
+ ( tableToOpenXML
+ ) where
+
+import Control.Monad.State.Strict
+import Data.Array
+import Data.Text (Text)
+import Text.Pandoc.Definition
+import Text.Pandoc.Class.PandocMonad (PandocMonad, translateTerm)
+import Text.Pandoc.Writers.Docx.Types
+import Text.Pandoc.Shared
+import Text.Printf (printf)
+import Text.Pandoc.Writers.GridTable hiding (Table)
+import Text.Pandoc.Writers.OOXML
+import Text.Pandoc.XML.Light as XML hiding (Attr)
+import qualified Data.Text as T
+import qualified Text.Pandoc.Translations as Term
+import qualified Text.Pandoc.Writers.GridTable as Grid
+
+tableToOpenXML :: PandocMonad m
+ => ([Block] -> WS m [Content])
+ -> Grid.Table
+ -> WS m [Content]
+tableToOpenXML blocksToOpenXML gridTable = do
+ setFirstPara
+ let (Grid.Table (ident,_,_) caption colspecs _rowheads thead tbodies tfoot) =
+ gridTable
+ let (Caption _maybeShortCaption captionBlocks) = caption
+ tablenum <- gets stNextTableNum
+ unless (null captionBlocks) $
+ modify $ \st -> st{ stNextTableNum = tablenum + 1 }
+ let tableid = if T.null ident
+ then "table" <> tshow tablenum
+ else ident
+ tablename <- translateTerm Term.Table
+ let captionStr = stringify captionBlocks
+ let aligns = map fst $ elems colspecs
+ captionXml <- if null captionBlocks
+ then return []
+ else withParaPropM (pStyleM "Table Caption")
+ $ blocksToOpenXML
+ $ addLabel tableid tablename tablenum captionBlocks
+ -- We set "in table" after processing the caption, because we don't
+ -- want the "Table Caption" style to be overwritten with "Compact".
+ modify $ \s -> s { stInTable = True }
+ head' <- cellGridToOpenXML blocksToOpenXML HeadRow aligns thead
+ bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow aligns) tbodies
+ foot' <- cellGridToOpenXML blocksToOpenXML FootRow aligns tfoot
+
+ let hasHeader = not . null . indices . partRowAttrs $ thead
+ let hasFooter = not . null . indices . partRowAttrs $ tfoot
+ -- for compatibility with Word <= 2007, we include a val with a bitmask
+ -- 0×0020 Apply first row conditional formatting
+ -- 0×0040 Apply last row conditional formatting
+ -- 0×0080 Apply first column conditional formatting
+ -- 0×0100 Apply last column conditional formatting
+ -- 0×0200 Do not apply row banding conditional formatting
+ -- 0×0400 Do not apply column banding conditional formattin
+ let tblLookVal = if hasHeader then (0x20 :: Int) else 0
+ let (gridCols, tblWattr) = tableLayout (elems colspecs)
+ let tbl = mknode "w:tbl" []
+ ( mknode "w:tblPr" []
+ ( mknode "w:tblStyle" [("w:val","Table")] () :
+ mknode "w:tblW" tblWattr () :
+ mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
+ ,("w:lastRow",if hasFooter then "1" else "0")
+ ,("w:firstColumn","0")
+ ,("w:lastColumn","0")
+ ,("w:noHBand","0")
+ ,("w:noVBand","0")
+ ,("w:val", T.pack $ printf "%04x" tblLookVal)
+ ] () :
+ [ mknode "w:tblCaption" [("w:val", captionStr)] ()
+ | not (T.null captionStr) ]
+ )
+ : mknode "w:tblGrid" [] gridCols
+ : head' ++ mconcat bodies ++ foot'
+ )
+ modify $ \s -> s { stInTable = False }
+ return $ captionXml ++ [Elem tbl]
+
+addLabel :: Text -> Text -> Int -> [Block] -> [Block]
+addLabel tableid tablename tablenum bs =
+ case bs of
+ (Para ils : rest) -> Para (label : Space : ils) : rest
+ (Plain ils : rest) -> Plain (label : Space : ils) : rest
+ _ -> Para [label] : bs
+ where
+ label = Span (tableid,[],[])
+ [Str (tablename <> "\160"),
+ RawInline (Format "openxml")
+ ("<w:fldSimple w:instr=\"SEQ Table"
+ <> " \\* ARABIC \"><w:r><w:t>"
+ <> tshow tablenum
+ <> "</w:t></w:r></w:fldSimple>"),
+ Str ":"]
+
+-- | Parts of a table
+data RowType = HeadRow | BodyRow | FootRow
+
+alignmentToString :: Alignment -> Text
+alignmentToString = \case
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableLayout :: [ColSpec] -> ([Element], [(Text, Text)])
+tableLayout specs =
+ let
+ textwidth = 7920 -- 5.5 in in twips (1 twip == 1/20 pt)
+ fullrow = 5000 -- 100% specified in pct (1 pct == 1/50th of a percent)
+ ncols = length specs
+ getWidth = \case
+ ColWidth n -> n
+ _ -> 0
+ widths = map (getWidth . snd) specs
+ rowwidth = round (fullrow * sum widths) :: Int
+ widthToTwips w = floor (textwidth * w) :: Int
+ mkGridCol w = mknode "w:gridCol" [("w:w", tshow (widthToTwips w))] ()
+ in if all (== 0) widths
+ then ( replicate ncols $ mkGridCol (1.0 / fromIntegral ncols)
+ , [ ("w:type", "auto"), ("w:w", "0")])
+ else ( map mkGridCol widths
+ , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ])
+
+cellGridToOpenXML :: PandocMonad m
+ => ([Block] -> WS m [Content])
+ -> RowType
+ -> [Alignment]
+ -> Part
+ -> WS m [Element]
+cellGridToOpenXML blocksToOpenXML rowType aligns part@(Part _ cellArray _) =
+ if null (elems cellArray)
+ then return mempty
+ else mapM (rowToOpenXML blocksToOpenXML) $
+ partToRows rowType aligns part
+
+data OOXMLCell
+ = OOXMLCell Attr Alignment RowSpan ColSpan [Block]
+ | OOXMLCellMerge ColSpan
+
+data OOXMLRow = OOXMLRow RowType Attr [OOXMLCell]
+
+partToRows :: RowType -> [Alignment] -> Part -> [OOXMLRow]
+partToRows rowType aligns part =
+ let
+ toOOXMLCell :: Alignment -> RowIndex -> ColIndex -> GridCell -> [OOXMLCell]
+ toOOXMLCell columnAlign ridx cidx = \case
+ ContentCell attr align rowspan colspan blocks ->
+ -- Respect non-default, cell specific alignment.
+ let align' = case align of
+ AlignDefault -> columnAlign
+ _ -> align
+ in [OOXMLCell attr align' rowspan colspan blocks]
+ ContinuationCell idx'@(ridx',cidx') | ridx /= ridx', cidx == cidx' ->
+ case (partCellArray part)!idx' of
+ (ContentCell _ _ _ colspan _) -> [OOXMLCellMerge colspan]
+ x -> error $ "Content cell expected, got, " ++ show x ++
+ " at index " ++ show idx'
+ _ -> mempty
+ mkRow :: (RowIndex, Attr) -> OOXMLRow
+ mkRow (ridx, attr) = OOXMLRow rowType attr
+ . mconcat
+ . zipWith (\align -> uncurry $ toOOXMLCell align ridx)
+ aligns
+ . assocs
+ . rowArray ridx
+ $ partCellArray part
+ in map mkRow $ assocs (partRowAttrs part)
+
+rowToOpenXML :: PandocMonad m
+ => ([Block] -> WS m [Content])
+ -> OOXMLRow
+ -> WS m Element
+rowToOpenXML blocksToOpenXML (OOXMLRow rowType _attr cells) = do
+ xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells
+ let addTrPr = case rowType of
+ HeadRow -> (mknode "w:trPr" []
+ [mknode "w:tblHeader" [("w:val", "true")] ()] :)
+ BodyRow -> id
+ FootRow -> id
+ return $ mknode "w:tr" [] (addTrPr xmlcells)
+
+ooxmlCellToOpenXML :: PandocMonad m
+ => ([Block] -> WS m [Content])
+ -> OOXMLCell
+ -> WS m Element
+ooxmlCellToOpenXML blocksToOpenXML = \case
+ OOXMLCellMerge (ColSpan colspan) -> do
+ return $ mknode "w:tc" []
+ [ mknode "w:tcPr" [] [ mknode "w:gridSpan" [("w:val", tshow colspan)] ()
+ , mknode "w:vMerge" [("w:val", "continue")] () ]
+ , mknode "w:p" [] [mknode "w:pPr" [] ()]]
+ OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do
+ compactStyle <- pStyleM "Compact"
+ es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents
+ -- Table cells require a <w:p> element, even an empty one!
+ -- Not in the spec but in Word 2007, 2010. See #4953. And
+ -- apparently the last element must be a <w:p>, see #6983.
+ return . mknode "w:tc" [] $
+ Elem
+ (mknode "w:tcPr" [] ([ mknode "w:gridSpan" [("w:val", tshow colspan)] ()
+ | colspan > 1] ++
+ [ mknode "w:vMerge" [("w:val", "restart")] ()
+ | rowspan > RowSpan 1 ])) :
+ if null contents
+ then [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
+ else case reverse (onlyElems es) of
+ b:e:_ | qName (elName b) == "bookmarkEnd" -- y tho?
+ , qName (elName e) == "p" -> es
+ e:_ | qName (elName e) == "p" -> es
+ _ -> es ++ [Elem $ mknode "w:p" [] ()]
+
+alignmentFor :: Alignment -> Element
+alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs
new file mode 100644
index 000000000..74b8d2753
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docx/Types.hs
@@ -0,0 +1,185 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+Module : Text.Pandoc.Writers.Docx
+Copyright : Copyright (C) 2012-2021 John MacFarlane
+License : GNU GPL, version 2 or above
+Maintainer : John MacFarlane <jgm@berkeley.edu>
+
+Conversion of table blocks to docx.
+-}
+module Text.Pandoc.Writers.Docx.Types
+ ( EnvProps (..)
+ , WriterEnv (..)
+ , defaultWriterEnv
+ , WriterState (..)
+ , defaultWriterState
+ , WS
+ , ListMarker (..)
+ , listMarkerToId
+ , pStyleM
+ , isStyle
+ , setFirstPara
+ , withParaProp
+ , withParaPropM
+ ) where
+
+import Control.Applicative ((<|>))
+import Control.Monad.Reader
+import Control.Monad.State.Strict
+import Data.Text (Text)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.MIME (MimeType)
+import Text.Pandoc.Writers.Docx.StyleMap
+import Text.Pandoc.Writers.OOXML
+import Text.Pandoc.XML.Light as XML
+import qualified Data.ByteString as B
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import qualified Data.Text as T
+
+data ListMarker = NoMarker
+ | BulletMarker
+ | NumberMarker ListNumberStyle ListNumberDelim Int
+ deriving (Show, Read, Eq, Ord)
+
+listMarkerToId :: ListMarker -> Text
+listMarkerToId NoMarker = "990"
+listMarkerToId BulletMarker = "991"
+listMarkerToId (NumberMarker sty delim n) = T.pack $
+ '9' : '9' : styNum : delimNum : show n
+ where styNum = case sty of
+ DefaultStyle -> '2'
+ Example -> '3'
+ Decimal -> '4'
+ LowerRoman -> '5'
+ UpperRoman -> '6'
+ LowerAlpha -> '7'
+ UpperAlpha -> '8'
+ delimNum = case delim of
+ DefaultDelim -> '0'
+ Period -> '1'
+ OneParen -> '2'
+ TwoParens -> '3'
+
+
+data EnvProps = EnvProps{ styleElement :: Maybe Element
+ , otherElements :: [Element]
+ }
+
+instance Semigroup EnvProps where
+ EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es')
+
+instance Monoid EnvProps where
+ mempty = EnvProps Nothing []
+ mappend = (<>)
+
+data WriterEnv = WriterEnv
+ { envTextProperties :: EnvProps
+ , envParaProperties :: EnvProps
+ , envRTL :: Bool
+ , envListLevel :: Int
+ , envListNumId :: Int
+ , envInDel :: Bool
+ , envChangesAuthor :: Text
+ , envChangesDate :: Text
+ , envPrintWidth :: Integer
+ }
+
+defaultWriterEnv :: WriterEnv
+defaultWriterEnv = WriterEnv
+ { envTextProperties = mempty
+ , envParaProperties = mempty
+ , envRTL = False
+ , envListLevel = -1
+ , envListNumId = 1
+ , envInDel = False
+ , envChangesAuthor = "unknown"
+ , envChangesDate = "1969-12-31T19:00:00Z"
+ , envPrintWidth = 1
+ }
+
+
+data WriterState = WriterState{
+ stFootnotes :: [Element]
+ , stComments :: [([(Text, Text)], [Inline])]
+ , stSectionIds :: Set.Set Text
+ , stExternalLinks :: M.Map Text Text
+ , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString)
+ , stLists :: [ListMarker]
+ , stInsId :: Int
+ , stDelId :: Int
+ , stStyleMaps :: StyleMaps
+ , stFirstPara :: Bool
+ , stInTable :: Bool
+ , stInList :: Bool
+ , stTocTitle :: [Inline]
+ , stDynamicParaProps :: Set.Set ParaStyleName
+ , stDynamicTextProps :: Set.Set CharStyleName
+ , stCurId :: Int
+ , stNextFigureNum :: Int
+ , stNextTableNum :: Int
+ }
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState{
+ stFootnotes = defaultFootnotes
+ , stComments = []
+ , stSectionIds = Set.empty
+ , stExternalLinks = M.empty
+ , stImages = M.empty
+ , stLists = [NoMarker]
+ , stInsId = 1
+ , stDelId = 1
+ , stStyleMaps = StyleMaps M.empty M.empty
+ , stFirstPara = False
+ , stInTable = False
+ , stInList = False
+ , stTocTitle = [Str "Table of Contents"]
+ , stDynamicParaProps = Set.empty
+ , stDynamicTextProps = Set.empty
+ , stCurId = 20
+ , stNextFigureNum = 1
+ , stNextTableNum = 1
+ }
+
+setFirstPara :: PandocMonad m => WS m ()
+setFirstPara = modify $ \s -> s { stFirstPara = True }
+
+type WS m = ReaderT WriterEnv (StateT WriterState m)
+
+-- Word will insert these footnotes into the settings.xml file
+-- (whether or not they're visible in the document). If they're in the
+-- file, but not in the footnotes.xml file, it will produce
+-- problems. So we want to make sure we insert them into our document.
+defaultFootnotes :: [Element]
+defaultFootnotes = [ mknode "w:footnote"
+ [("w:type", "separator"), ("w:id", "-1")]
+ [ mknode "w:p" []
+ [mknode "w:r" []
+ [ mknode "w:separator" [] ()]]]
+ , mknode "w:footnote"
+ [("w:type", "continuationSeparator"), ("w:id", "0")]
+ [ mknode "w:p" []
+ [ mknode "w:r" []
+ [ mknode "w:continuationSeparator" [] ()]]]]
+
+pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element
+pStyleM styleName = do
+ pStyleMap <- gets (smParaStyle . stStyleMaps)
+ let sty' = getStyleIdFromName styleName pStyleMap
+ return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] ()
+
+withParaProp :: PandocMonad m => Element -> WS m a -> WS m a
+withParaProp d p =
+ local (\env -> env {envParaProperties = ep <> envParaProperties env}) p
+ where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d]
+
+withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
+withParaPropM md p = do
+ d <- md
+ withParaProp d p
+
+isStyle :: Element -> Bool
+isStyle e = isElem [] "w" "rStyle" e ||
+ isElem [] "w" "pStyle" e