diff options
author | Albert Krewinkel <albert@zeitkraut.de> | 2021-04-20 10:54:46 +0200 |
---|---|---|
committer | Albert Krewinkel <albert@zeitkraut.de> | 2021-04-20 10:57:54 +0200 |
commit | 0b74bbbdaa643a473e26ea14d0a94efac6078d8c (patch) | |
tree | f8299e8a9d52a290e6f7186a8d2dd7b31ba67ac3 /src/Text/Pandoc/Writers | |
parent | 3ab08fe2fb0ba7e786e7d2e89219d254b7753dd3 (diff) | |
download | pandoc-0b74bbbdaa643a473e26ea14d0a94efac6078d8c.tar.gz |
Docx writer: extract Table handling into separate module
Diffstat (limited to 'src/Text/Pandoc/Writers')
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 226 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx/Table.hs | 114 |
2 files changed, 119 insertions, 221 deletions
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 749ad9a21..7064ded09 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -22,7 +22,6 @@ import Control.Applicative ((<|>)) import Control.Monad.Except (catchError, throwError) import Control.Monad.Reader import Control.Monad.State.Strict -import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import Data.Char (isSpace, isLetter) import Data.List (intercalate, isPrefixOf, isSuffixOf) @@ -47,123 +46,24 @@ import Text.Pandoc.Highlighting (highlight) import Text.Pandoc.Error import Text.Pandoc.ImageSize import Text.Pandoc.Logging -import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType, - getMimeTypeDef) +import Text.Pandoc.MIME (extensionFromMimeType, getMimeType, getMimeTypeDef) import Text.Pandoc.Options import Text.Pandoc.Writers.Docx.StyleMap +import Text.Pandoc.Writers.Docx.Table +import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Text.Printf (printf) import Text.TeXMath import Text.Pandoc.Writers.OOXML import Text.Pandoc.XML.Light as XML import Data.Generics (mkT, everywhere) -data ListMarker = NoMarker - | BulletMarker - | NumberMarker ListNumberStyle ListNumberDelim Int - deriving (Show, Read, Eq, Ord) - -listMarkerToId :: ListMarker -> Text -listMarkerToId NoMarker = "990" -listMarkerToId BulletMarker = "991" -listMarkerToId (NumberMarker sty delim n) = T.pack $ - '9' : '9' : styNum : delimNum : show n - where styNum = case sty of - DefaultStyle -> '2' - Example -> '3' - Decimal -> '4' - LowerRoman -> '5' - UpperRoman -> '6' - LowerAlpha -> '7' - UpperAlpha -> '8' - delimNum = case delim of - DefaultDelim -> '0' - Period -> '1' - OneParen -> '2' - TwoParens -> '3' - -data EnvProps = EnvProps{ styleElement :: Maybe Element - , otherElements :: [Element] - } - -instance Semigroup EnvProps where - EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es') - -instance Monoid EnvProps where - mempty = EnvProps Nothing [] - mappend = (<>) - squashProps :: EnvProps -> [Element] squashProps (EnvProps Nothing es) = es squashProps (EnvProps (Just e) es) = e : es -data WriterEnv = WriterEnv{ envTextProperties :: EnvProps - , envParaProperties :: EnvProps - , envRTL :: Bool - , envListLevel :: Int - , envListNumId :: Int - , envInDel :: Bool - , envChangesAuthor :: Text - , envChangesDate :: Text - , envPrintWidth :: Integer - } - -defaultWriterEnv :: WriterEnv -defaultWriterEnv = WriterEnv{ envTextProperties = mempty - , envParaProperties = mempty - , envRTL = False - , envListLevel = -1 - , envListNumId = 1 - , envInDel = False - , envChangesAuthor = "unknown" - , envChangesDate = "1969-12-31T19:00:00Z" - , envPrintWidth = 1 - } - -data WriterState = WriterState{ - stFootnotes :: [Element] - , stComments :: [([(Text, Text)], [Inline])] - , stSectionIds :: Set.Set Text - , stExternalLinks :: M.Map Text Text - , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) - , stLists :: [ListMarker] - , stInsId :: Int - , stDelId :: Int - , stStyleMaps :: StyleMaps - , stFirstPara :: Bool - , stInTable :: Bool - , stInList :: Bool - , stTocTitle :: [Inline] - , stDynamicParaProps :: Set.Set ParaStyleName - , stDynamicTextProps :: Set.Set CharStyleName - , stCurId :: Int - } - -defaultWriterState :: WriterState -defaultWriterState = WriterState{ - stFootnotes = defaultFootnotes - , stComments = [] - , stSectionIds = Set.empty - , stExternalLinks = M.empty - , stImages = M.empty - , stLists = [NoMarker] - , stInsId = 1 - , stDelId = 1 - , stStyleMaps = StyleMaps M.empty M.empty - , stFirstPara = False - , stInTable = False - , stInList = False - , stTocTitle = [Str "Table of Contents"] - , stDynamicParaProps = Set.empty - , stDynamicTextProps = Set.empty - , stCurId = 20 - } - -type WS m = ReaderT WriterEnv (StateT WriterState m) - renumIdMap :: Int -> [Element] -> M.Map Text Text renumIdMap _ [] = M.empty renumIdMap n (e:es) @@ -858,12 +758,6 @@ separateTables (x@Table{}:xs@(Table{}:_)) = x : RawBlock (Format "openxml") "<w:p />" : separateTables xs separateTables (x:xs) = x : separateTables xs -pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element -pStyleM styleName = do - pStyleMap <- gets (smParaStyle . stStyleMaps) - let sty' = getStyleIdFromName styleName pStyleMap - return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] () - rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element rStyleM styleName = do cStyleMap <- gets (smCharStyle . stStyleMaps) @@ -995,78 +889,8 @@ blockToOpenXML' _ HorizontalRule = do $ mknode "v:rect" [("style","width:0;height:1.5pt"), ("o:hralign","center"), ("o:hrstd","t"),("o:hr","t")] () ] -blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do - let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot - setFirstPara - modify $ \s -> s { stInTable = True } - let captionStr = stringify caption - caption' <- if null caption - then return [] - else withParaPropM (pStyleM "Table Caption") - $ blockToOpenXML opts (Para caption) - let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () - -- Table cells require a <w:p> element, even an empty one! - -- Not in the spec but in Word 2007, 2010. See #4953. And - -- apparently the last element must be a <w:p>, see #6983. - let cellToOpenXML (al, cell) = do - es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell - return $ - case reverse (onlyElems es) of - b:e:_ | qName (elName b) == "bookmarkEnd" - , qName (elName e) == "p" -> es - e:_ | qName (elName e) == "p" -> es - _ -> es ++ [Elem $ mknode "w:p" [] ()] - headers' <- mapM cellToOpenXML $ zip aligns headers - rows' <- mapM (mapM cellToOpenXML . zip aligns) rows - compactStyle <- pStyleM "Compact" - let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] - let mkcell contents = mknode "w:tc" [] - $ if null contents - then emptyCell' - else contents - let mkrow cells = - mknode "w:tr" [] $ - map mkcell cells - let textwidth = 7920 -- 5.5 in in twips, 1/20 pt - let fullrow = 5000 -- 100% specified in pct - let (rowwidth :: Int) = round $ fullrow * sum widths - let mkgridcol w = mknode "w:gridCol" - [("w:w", tshow (floor (textwidth * w) :: Integer))] () - let hasHeader = not $ all null headers - modify $ \s -> s { stInTable = False } - -- for compatibility with Word <= 2007, we include a val with a bitmask - -- 0×0020 Apply first row conditional formatting - -- 0×0040 Apply last row conditional formatting - -- 0×0080 Apply first column conditional formatting - -- 0×0100 Apply last column conditional formatting - -- 0×0200 Do not apply row banding conditional formatting - -- 0×0400 Do not apply column banding conditional formattin - let tblLookVal :: Int - tblLookVal = if hasHeader then 0x20 else 0 - return $ - caption' ++ - [Elem $ - mknode "w:tbl" [] - ( mknode "w:tblPr" [] - ( mknode "w:tblStyle" [("w:val","Table")] () : - mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () : - mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") - ,("w:lastRow","0") - ,("w:firstColumn","0") - ,("w:lastColumn","0") - ,("w:noHBand","0") - ,("w:noVBand","0") - ,("w:val", T.pack $ printf "%04x" tblLookVal) - ] () : - [ mknode "w:tblCaption" [("w:val", captionStr)] () - | not (null caption) ] ) - : mknode "w:tblGrid" [] - (if all (==0) widths - then [] - else map mkgridcol widths) - : [ mkrow headers' | hasHeader ] ++ - map mkrow rows' - )] +blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = + tableToOpenXML (blocksToOpenXML opts) blkCapt specs thead tbody tfoot blockToOpenXML' opts el | BulletList lst <- el = addOpenXMLList BulletMarker lst | OrderedList (start, numstyle, numdelim) lst <- el @@ -1121,13 +945,6 @@ listItemToOpenXML opts numid (first:rest) = do modify $ \st -> st{ stInList = oldInList } return $ first'' ++ rest'' -alignmentToString :: Alignment -> Text -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -- | Convert a list of inline elements to OpenXML. inlinesToOpenXML :: PandocMonad m => WriterOptions -> [Inline] -> WS m [Content] inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst @@ -1138,10 +955,6 @@ withNumId numid = local $ \env -> env{ envListNumId = numid } asList :: (PandocMonad m) => WS m a -> WS m a asList = local $ \env -> env{ envListLevel = envListLevel env + 1 } -isStyle :: Element -> Bool -isStyle e = isElem [] "w" "rStyle" e || - isElem [] "w" "pStyle" e - getTextProps :: (PandocMonad m) => WS m [Element] getTextProps = do props <- asks envTextProperties @@ -1170,16 +983,6 @@ getParaProps displayMathPara = do [] -> [] ps -> [mknode "w:pPr" [] ps] -withParaProp :: PandocMonad m => Element -> WS m a -> WS m a -withParaProp d p = - local (\env -> env {envParaProperties = ep <> envParaProperties env}) p - where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d] - -withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a -withParaPropM md p = do - d <- md - withParaProp d p - formattedString :: PandocMonad m => Text -> WS m [Element] formattedString str = -- properly handle soft hyphens @@ -1200,9 +1003,6 @@ formattedRun els = do props <- getTextProps return [ mknode "w:r" [] $ props ++ els ] -setFirstPara :: PandocMonad m => WS m () -setFirstPara = modify $ \s -> s { stFirstPara = True } - -- | Convert an inline element to OpenXML. inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content] inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il @@ -1494,22 +1294,6 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [] ()] --- Word will insert these footnotes into the settings.xml file --- (whether or not they're visible in the document). If they're in the --- file, but not in the footnotes.xml file, it will produce --- problems. So we want to make sure we insert them into our document. -defaultFootnotes :: [Element] -defaultFootnotes = [ mknode "w:footnote" - [("w:type", "separator"), ("w:id", "-1")] - [ mknode "w:p" [] - [mknode "w:r" [] - [ mknode "w:separator" [] ()]]] - , mknode "w:footnote" - [("w:type", "continuationSeparator"), ("w:id", "0")] - [ mknode "w:p" [] - [ mknode "w:r" [] - [ mknode "w:continuationSeparator" [] ()]]]] - withDirection :: PandocMonad m => WS m a -> WS m a withDirection x = do diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs new file mode 100644 index 000000000..a6b137fc4 --- /dev/null +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -0,0 +1,114 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- | +Module : Text.Pandoc.Writers.Docx +Copyright : Copyright (C) 2012-2021 John MacFarlane +License : GNU GPL, version 2 or above +Maintainer : John MacFarlane <jgm@berkeley.edu> + +Conversion of table blocks to docx. +-} +module Text.Pandoc.Writers.Docx.Table + ( tableToOpenXML + ) where + +import Control.Monad.State.Strict +import Data.Text (Text) +import Text.Pandoc.Definition +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Writers.Docx.Types +import Text.Pandoc.Shared +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) +import Text.Pandoc.Writers.OOXML +import Text.Pandoc.XML.Light as XML +import qualified Data.Text as T + +tableToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> Caption + -> [ColSpec] + -> TableHead + -> [TableBody] + -> TableFoot + -> WS m [Content] +tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do + let (caption, aligns, widths, headers, rows) = + toLegacyTable blkCapt specs thead tbody tfoot + setFirstPara + modify $ \s -> s { stInTable = True } + let captionStr = stringify caption + caption' <- if null caption + then return [] + else withParaPropM (pStyleM "Table Caption") + $ blocksToOpenXML [Para caption] + let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () + -- Table cells require a <w:p> element, even an empty one! + -- Not in the spec but in Word 2007, 2010. See #4953. And + -- apparently the last element must be a <w:p>, see #6983. + let cellToOpenXML (al, cell) = do + es <- withParaProp (alignmentFor al) $ blocksToOpenXML cell + return $ + case reverse (onlyElems es) of + b:e:_ | qName (elName b) == "bookmarkEnd" + , qName (elName e) == "p" -> es + e:_ | qName (elName e) == "p" -> es + _ -> es ++ [Elem $ mknode "w:p" [] ()] + headers' <- mapM cellToOpenXML $ zip aligns headers + rows' <- mapM (mapM cellToOpenXML . zip aligns) rows + compactStyle <- pStyleM "Compact" + let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] + let mkcell contents = mknode "w:tc" [] + $ if null contents + then emptyCell' + else contents + let mkrow cells = + mknode "w:tr" [] $ + map mkcell cells + let textwidth = 7920 -- 5.5 in in twips, 1/20 pt + let fullrow = 5000 -- 100% specified in pct + let (rowwidth :: Int) = round $ fullrow * sum widths + let mkgridcol w = mknode "w:gridCol" + [("w:w", tshow (floor (textwidth * w) :: Integer))] () + let hasHeader = not $ all null headers + modify $ \s -> s { stInTable = False } + -- for compatibility with Word <= 2007, we include a val with a bitmask + -- 0×0020 Apply first row conditional formatting + -- 0×0040 Apply last row conditional formatting + -- 0×0080 Apply first column conditional formatting + -- 0×0100 Apply last column conditional formatting + -- 0×0200 Do not apply row banding conditional formatting + -- 0×0400 Do not apply column banding conditional formattin + let tblLookVal :: Int + tblLookVal = if hasHeader then 0x20 else 0 + return $ + caption' ++ + [Elem $ + mknode "w:tbl" [] + ( mknode "w:tblPr" [] + ( mknode "w:tblStyle" [("w:val","Table")] () : + mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () : + mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") + ,("w:lastRow","0") + ,("w:firstColumn","0") + ,("w:lastColumn","0") + ,("w:noHBand","0") + ,("w:noVBand","0") + ,("w:val", T.pack $ printf "%04x" tblLookVal) + ] () : + [ mknode "w:tblCaption" [("w:val", captionStr)] () + | not (null caption) ] ) + : mknode "w:tblGrid" [] + (if all (==0) widths + then [] + else map mkgridcol widths) + : [ mkrow headers' | hasHeader ] ++ + map mkrow rows' + )] + +alignmentToString :: Alignment -> Text +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" |