diff options
author | John MacFarlane <jgm@berkeley.edu> | 2019-09-21 11:39:15 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2019-09-21 11:39:15 -0700 |
commit | 780079aaec6b82d3d235e20afda06cdfc8b486d5 (patch) | |
tree | 6994f0b88d34cdc7df9baad28d2198e0819e2f29 | |
parent | e3a6648e8f2553bb37a158729ec7cfbdd942fbcb (diff) | |
parent | 9dbfd23c566efb5bf80deaf4e34b09cf38a97197 (diff) | |
download | pandoc-780079aaec6b82d3d235e20afda06cdfc8b486d5.tar.gz |
Merge branch 'lierdakil-docx-reader-styles'
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | MANUAL.txt | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 212 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Lists.hs | 25 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 315 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 127 | ||||
-rw-r--r-- | test/Tests/Readers/Docx.hs | 9 | ||||
-rw-r--r-- | test/docx/0_level_headers.native | 4 | ||||
-rw-r--r-- | test/docx/adjacent_codeblocks.docx | bin | 22437 -> 22264 bytes | |||
-rw-r--r-- | test/docx/compact-style-removal.docx | bin | 0 -> 9951 bytes | |||
-rw-r--r-- | test/docx/compact-style-removal.native | 5 | ||||
-rw-r--r-- | test/docx/custom-style-with-styles.native | 6 | ||||
-rw-r--r-- | test/docx/lists-compact.docx | bin | 0 -> 9952 bytes | |||
-rw-r--r-- | test/docx/lists-compact.native | 5 | ||||
-rw-r--r-- | test/docx/lists.docx | bin | 19845 -> 9473 bytes | |||
-rw-r--r-- | test/docx/lists.native | 2 | ||||
-rw-r--r-- | test/docx/nested_anchors_in_header.native | 2 |
17 files changed, 407 insertions, 312 deletions
diff --git a/.gitignore b/.gitignore index 32ef66e8b..e8690c4eb 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ windows/*.wixobj data/reference.docx data/reference.odt .stack-work +cabal.project.local diff --git a/MANUAL.txt b/MANUAL.txt index 3c9f158ef..7e01a5002 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5321,17 +5321,17 @@ And with the extension: $ pandoc test/docx/custom-style-reference.docx -f docx+styles -t markdown - ::: {custom-style="FirstParagraph"} + ::: {custom-style="First Paragraph"} This is some text. ::: - ::: {custom-style="BodyText"} + ::: {custom-style="Body Text"} This is text with an [emphasized]{custom-style="Emphatic"} text style. And this is text with a [strengthened]{custom-style="Strengthened"} text style. ::: - ::: {custom-style="MyBlockStyle"} + ::: {custom-style="My Block Style"} > Here is a styled paragraph that inherits from Block Text. ::: diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4f44d18e7..9d17ab118 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -2,6 +2,8 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE FlexibleContexts #-} {- | Module : Text.Pandoc.Readers.Docx Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -64,6 +66,7 @@ import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.Default (Default) import Data.List (delete, intersect) +import Data.Char (isSpace) import qualified Data.Map as M import Data.Maybe (isJust, fromMaybe) import Data.Sequence (ViewL (..), viewl) @@ -129,16 +132,16 @@ instance Default DEnv where type DocxContext m = ReaderT DEnv (StateT DState m) evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a -evalDocxContext ctx env st = flip evalStateT st $flip runReaderT env ctx +evalDocxContext ctx env st = flip evalStateT st $ runReaderT ctx env -- This is empty, but we put it in for future-proofing. -spansToKeep :: [String] +spansToKeep :: [CharStyleName] spansToKeep = [] -divsToKeep :: [String] -divsToKeep = ["list-item", "Definition", "DefinitionTerm"] +divsToKeep :: [ParaStyleName] +divsToKeep = ["Definition", "Definition Term"] -metaStyles :: M.Map String String +metaStyles :: M.Map ParaStyleName String metaStyles = M.fromList [ ("Title", "title") , ("Subtitle", "subtitle") , ("Author", "author") @@ -150,7 +153,7 @@ sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp) isMetaPar :: BodyPart -> Bool isMetaPar (Paragraph pPr _) = - not $ null $ intersect (pStyle pPr) (M.keys metaStyles) + not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles) isMetaPar _ = False isEmptyPar :: BodyPart -> Bool @@ -167,7 +170,7 @@ bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String M bodyPartsToMeta' [] = return M.empty bodyPartsToMeta' (bp : bps) | (Paragraph pPr parParts) <- bp - , (c : _)<- (pStyle pPr) `intersect` (M.keys metaStyles) + , (c : _)<- getStyleNames (pStyle pPr) `intersect` M.keys metaStyles , (Just metaField) <- M.lookup c metaStyles = do inlines <- smushInlines <$> mapM parPartToInlines parParts remaining <- bodyPartsToMeta' bps @@ -197,11 +200,29 @@ fixAuthors (MetaBlocks blks) = g _ = MetaInlines [] fixAuthors mv = mv -codeStyles :: [String] -codeStyles = ["VerbatimChar"] +isInheritedFromStyles :: (Eq (StyleName s), HasStyleName s, HasParentStyle s) => [StyleName s] -> s -> Bool +isInheritedFromStyles names sty + | getStyleName sty `elem` names = True + | Just psty <- getParentStyle sty = isInheritedFromStyles names psty + | otherwise = False -codeDivs :: [String] -codeDivs = ["SourceCode"] +hasStylesInheritedFrom :: [ParaStyleName] -> ParagraphStyle -> Bool +hasStylesInheritedFrom ns s = any (isInheritedFromStyles ns) $ pStyle s + +removeStyleNamed :: ParaStyleName -> ParagraphStyle -> ParagraphStyle +removeStyleNamed sn ps = ps{pStyle = filter (\psd -> getStyleName psd /= sn) $ pStyle ps} + +isCodeCharStyle :: CharStyle -> Bool +isCodeCharStyle = isInheritedFromStyles ["Verbatim Char"] + +isCodeDiv :: ParagraphStyle -> Bool +isCodeDiv = hasStylesInheritedFrom ["Source Code"] + +isBlockQuote :: ParStyle -> Bool +isBlockQuote = + isInheritedFromStyles [ + "Quote", "Block Text", "Block Quote", "Block Quotation" + ] runElemToInlines :: RunElem -> Inlines runElemToInlines (TextRun s) = text s @@ -227,57 +248,31 @@ parPartToString (InternalHyperLink _ runs) = concatMap runToString runs parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs parPartToString _ = "" -blacklistedCharStyles :: [String] +blacklistedCharStyles :: [CharStyleName] blacklistedCharStyles = ["Hyperlink"] resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle resolveDependentRunStyle rPr - | Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles = + | Just s <- rParentStyle rPr + , getStyleName s `elem` blacklistedCharStyles = return rPr - | Just (_, cs) <- rStyle rPr = do + | Just s <- rParentStyle rPr = do opts <- asks docxOptions if isEnabled Ext_styles opts then return rPr - else do rPr' <- resolveDependentRunStyle cs - return $ - RunStyle { isBold = case isBold rPr of - Just bool -> Just bool - Nothing -> isBold rPr' - , isItalic = case isItalic rPr of - Just bool -> Just bool - Nothing -> isItalic rPr' - , isSmallCaps = case isSmallCaps rPr of - Just bool -> Just bool - Nothing -> isSmallCaps rPr' - , isStrike = case isStrike rPr of - Just bool -> Just bool - Nothing -> isStrike rPr' - , isRTL = case isRTL rPr of - Just bool -> Just bool - Nothing -> isRTL rPr' - , rVertAlign = case rVertAlign rPr of - Just valign -> Just valign - Nothing -> rVertAlign rPr' - , rUnderline = case rUnderline rPr of - Just ulstyle -> Just ulstyle - Nothing -> rUnderline rPr' - , rStyle = rStyle rPr - } + else leftBiasedMergeRunStyle rPr <$> resolveDependentRunStyle (cStyleData s) | otherwise = return rPr runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines) runStyleToTransform rPr - | Just (s, _) <- rStyle rPr - , s `elem` spansToKeep = do - transform <- runStyleToTransform rPr{rStyle = Nothing} - return $ spanWith ("", [s], []) . transform - | Just (s, _) <- rStyle rPr = do - opts <- asks docxOptions - let extraInfo = if isEnabled Ext_styles opts - then spanWith ("", [], [("custom-style", s)]) - else id - transform <- runStyleToTransform rPr{rStyle = Nothing} - return $ extraInfo . transform + | Just sn <- getStyleName <$> rParentStyle rPr + , sn `elem` spansToKeep = do + transform <- runStyleToTransform rPr{rParentStyle = Nothing} + return $ spanWith ("", [normalizeToClassName sn], []) . transform + | Just s <- rParentStyle rPr = do + ei <- extraInfo spanWith s + transform <- runStyleToTransform rPr{rParentStyle = Nothing} + return $ ei . transform | Just True <- isItalic rPr = do transform <- runStyleToTransform rPr{isItalic = Nothing} return $ emph . transform @@ -309,8 +304,7 @@ runStyleToTransform rPr runToInlines :: PandocMonad m => Run -> DocxContext m Inlines runToInlines (Run rs runElems) - | Just (s, _) <- rStyle rs - , s `elem` codeStyles = do + | maybe False isCodeCharStyle $ rParentStyle rs = do rPr <- resolveDependentRunStyle rs let codeString = code $ concatMap runElemToString runElems return $ case rVertAlign rPr of @@ -525,55 +519,49 @@ trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils isSp LineBreak = True isSp _ = False +extraInfo :: (Eq (StyleName a), PandocMonad m, HasStyleName a) + => (Attr -> i -> i) -> a -> DocxContext m (i -> i) +extraInfo f s = do + opts <- asks docxOptions + return $ if | isEnabled Ext_styles opts + -> f ("", [], [("custom-style", fromStyleName $ getStyleName s)]) + | otherwise -> id + parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks) parStyleToTransform pPr | (c:cs) <- pStyle pPr - , c `elem` divsToKeep = do + , getStyleName c `elem` divsToKeep = do let pPr' = pPr { pStyle = cs } transform <- parStyleToTransform pPr' - return $ divWith ("", [c], []) . transform + return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform | (c:cs) <- pStyle pPr, - c `elem` listParagraphDivs = do + getStyleName c `elem` listParagraphStyles = do let pPr' = pPr { pStyle = cs, indentation = Nothing} transform <- parStyleToTransform pPr' - return $ divWith ("", [c], []) . transform - | (c:cs) <- pStyle pPr - , Just True <- pBlockQuote pPr = do - opts <- asks docxOptions - let pPr' = pPr { pStyle = cs } - transform <- parStyleToTransform pPr' - let extraInfo = if isEnabled Ext_styles opts - then divWith ("", [], [("custom-style", c)]) - else id - return $ extraInfo . blockQuote . transform + return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform | (c:cs) <- pStyle pPr = do - opts <- asks docxOptions - let pPr' = pPr { pStyle = cs} + let pPr' = pPr { pStyle = cs } transform <- parStyleToTransform pPr' - let extraInfo = if isEnabled Ext_styles opts - then divWith ("", [], [("custom-style", c)]) - else id - return $ extraInfo . transform + ei <- extraInfo divWith c + return $ ei . (if isBlockQuote c then blockQuote else id) . transform | null (pStyle pPr) - , Just left <- indentation pPr >>= leftParIndent - , Just hang <- indentation pPr >>= hangingParIndent = do + , Just left <- indentation pPr >>= leftParIndent = do let pPr' = pPr { indentation = Nothing } + hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent transform <- parStyleToTransform pPr' - return $ case (left - hang) > 0 of - True -> blockQuote . transform - False -> transform - | null (pStyle pPr), - Just left <- indentation pPr >>= leftParIndent = do - let pPr' = pPr { indentation = Nothing } - transform <- parStyleToTransform pPr' - return $ case left > 0 of - True -> blockQuote . transform - False -> transform + return $ if (left - hang) > 0 + then blockQuote . transform + else transform parStyleToTransform _ = return id +normalizeToClassName :: (FromStyleName a) => a -> String +normalizeToClassName = map go . fromStyleName + where go c | isSpace c = '-' + | otherwise = c + bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks bodyPartToBlocks (Paragraph pPr parparts) - | not $ null $ codeDivs `intersect` (pStyle pPr) = do + | isCodeDiv pPr = do transform <- parStyleToTransform pPr return $ transform $ @@ -583,62 +571,60 @@ bodyPartToBlocks (Paragraph pPr parparts) ils <-local (\s-> s{docxInHeaderBlock=True}) (smushInlines <$> mapM parPartToInlines parparts) makeHeaderAnchor $ - headerWith ("", delete style (pStyle pPr), []) n ils + headerWith ("", map normalizeToClassName . delete style $ getStyleNames (pStyle pPr), []) n ils | otherwise = do - ils <- (trimSps . smushInlines) <$> mapM parPartToInlines parparts + ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts prevParaIls <- gets docxPrevPara dropIls <- gets docxDropCap let ils' = dropIls <> ils - if dropCap pPr + let (paraOrPlain, pPr') + | hasStylesInheritedFrom ["Compact"] pPr = (plain, removeStyleNamed "Compact" pPr) + | otherwise = (para, pPr) + if dropCap pPr' then do modify $ \s -> s { docxDropCap = ils' } return mempty else do modify $ \s -> s { docxDropCap = mempty } let ils'' = prevParaIls <> (if isNull prevParaIls then mempty else space) <> ils' + handleInsertion = do + modify $ \s -> s {docxPrevPara = mempty} + transform <- parStyleToTransform pPr' + return $ transform $ paraOrPlain ils'' opts <- asks docxOptions - case () of - - _ | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) -> + if | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) -> return mempty - _ | Just (TrackedChange Insertion _) <- pChange pPr - , AcceptChanges <- readerTrackChanges opts -> do - modify $ \s -> s {docxPrevPara = mempty} - transform <- parStyleToTransform pPr - return $ transform $ para ils'' - _ | Just (TrackedChange Insertion _) <- pChange pPr + | Just (TrackedChange Insertion _) <- pChange pPr' + , AcceptChanges <- readerTrackChanges opts -> + handleInsertion + | Just (TrackedChange Insertion _) <- pChange pPr' , RejectChanges <- readerTrackChanges opts -> do modify $ \s -> s {docxPrevPara = ils''} return mempty - _ | Just (TrackedChange Insertion cInfo) <- pChange pPr + | Just (TrackedChange Insertion cInfo) <- pChange pPr' , AllChanges <- readerTrackChanges opts , ChangeInfo _ cAuthor cDate <- cInfo -> do let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)]) insertMark = spanWith attr mempty - transform <- parStyleToTransform pPr + transform <- parStyleToTransform pPr' return $ transform $ - para $ ils'' <> insertMark - _ | Just (TrackedChange Deletion _) <- pChange pPr + paraOrPlain $ ils'' <> insertMark + | Just (TrackedChange Deletion _) <- pChange pPr' , AcceptChanges <- readerTrackChanges opts -> do modify $ \s -> s {docxPrevPara = ils''} return mempty - _ | Just (TrackedChange Deletion _) <- pChange pPr - , RejectChanges <- readerTrackChanges opts -> do - modify $ \s -> s {docxPrevPara = mempty} - transform <- parStyleToTransform pPr - return $ transform $ para ils'' - _ | Just (TrackedChange Deletion cInfo) <- pChange pPr + | Just (TrackedChange Deletion _) <- pChange pPr' + , RejectChanges <- readerTrackChanges opts -> + handleInsertion + | Just (TrackedChange Deletion cInfo) <- pChange pPr' , AllChanges <- readerTrackChanges opts , ChangeInfo _ cAuthor cDate <- cInfo -> do let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)]) insertMark = spanWith attr mempty - transform <- parStyleToTransform pPr + transform <- parStyleToTransform pPr' return $ transform $ - para $ ils'' <> insertMark - _ | otherwise -> do - modify $ \s -> s {docxPrevPara = mempty} - transform <- parStyleToTransform pPr - return $ transform $ para ils'' + paraOrPlain $ ils'' <> insertMark + | otherwise -> handleInsertion bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do -- We check whether this current numId has previously been used, -- since Docx expects us to pick up where we left off. @@ -658,7 +644,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do blks <- bodyPartToBlocks (Paragraph pPr parparts) return $ divWith ("", ["list-item"], kvs) blks bodyPartToBlocks (ListItem pPr _ _ _ parparts) = - let pPr' = pPr {pStyle = "ListParagraph": pStyle pPr} + let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr} in bodyPartToBlocks $ Paragraph pPr' parparts bodyPartToBlocks (Tbl _ _ _ []) = diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index cc390f122..eb24640c5 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Lists Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -14,13 +15,16 @@ Functions for converting flat docx paragraphs into nested lists. module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets , blocksToDefinitions , listParagraphDivs + , listParagraphStyles ) where import Prelude import Data.List import Data.Maybe +import Data.String (fromString) import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.JSON +import Text.Pandoc.Readers.Docx.Parse (ParaStyleName) import Text.Pandoc.Shared (trim, safeRead) isListItem :: Block -> Bool @@ -79,7 +83,10 @@ getListType b@(Div (_, _, kvs) _) | isListItem b = getListType _ = Nothing listParagraphDivs :: [String] -listParagraphDivs = ["ListParagraph"] +listParagraphDivs = ["list-paragraph"] + +listParagraphStyles :: [ParaStyleName] +listParagraphStyles = map fromString listParagraphDivs -- This is a first stab at going through and attaching meaning to list -- paragraphs, without an item marker, following a list item. We @@ -160,7 +167,7 @@ blocksToDefinitions' defAcc acc [] = reverse $ DefinitionList (reverse defAcc) : acc blocksToDefinitions' defAcc acc (Div (_, classes1, _) blks1 : Div (ident2, classes2, kvs2) blks2 : blks) - | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = + | "Definition-Term" `elem` classes1 && "Definition" `elem` classes2 = let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) in @@ -169,12 +176,12 @@ blocksToDefinitions' ((defTerm, defItems):defs) acc (Div (ident2, classes2, kvs2) blks2 : blks) | "Definition" `elem` classes2 = let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - defItems2 = case remainingAttr2 == ("", [], []) of - True -> blks2 - False -> [Div remainingAttr2 blks2] - defAcc' = case null defItems of - True -> (defTerm, [defItems2]) : defs - False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs + defItems2 = if remainingAttr2 == ("", [], []) + then blks2 + else [Div remainingAttr2 blks2] + defAcc' = if null defItems + then (defTerm, [defItems2]) : defs + else (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs in blocksToDefinitions' defAcc' acc blks blocksToDefinitions' [] acc (b:blks) = @@ -198,7 +205,5 @@ removeListDivs' blk = [blk] removeListDivs :: [Block] -> [Block] removeListDivs = concatMap removeListDivs' - - blocksToDefinitions :: [Block] -> [Block] blocksToDefinitions = blocksToDefinitions' [] [] diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index f725660b9..00c5fb0be 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,7 +1,11 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | Module : Text.Pandoc.Readers.Docx.Parse Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -31,6 +35,8 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , VertAlign(..) , ParIndentation(..) , ParagraphStyle(..) + , ParStyle + , CharStyle(cStyleData) , Row(..) , Cell(..) , TrackedChange(..) @@ -38,8 +44,17 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , ChangeInfo(..) , FieldInfo(..) , Level(..) + , ParaStyleName + , CharStyleName + , FromStyleName(..) + , HasStyleName(..) + , HasParentStyle(..) , archiveToDocx , archiveToDocxWithWarnings + , getStyleNames + , pHeading + , constructBogusParStyleData + , leftBiasedMergeRunStyle ) where import Prelude import Codec.Archive.Zip @@ -49,10 +64,13 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B -import Data.Char (chr, ord, readLitChar) +import Data.Char (chr, ord, readLitChar, toLower) import Data.List +import Data.Function (on) +import Data.String (IsString(..)) import qualified Data.Map as M import Data.Maybe +import Data.Coerce import System.FilePath import Text.Pandoc.Readers.Docx.Util import Text.Pandoc.Readers.Docx.Fields @@ -121,9 +139,9 @@ unwrap :: NameSpaces -> Content -> [Content] unwrap ns (Elem element) | isElem ns "w" "sdt" element , Just sdtContent <- findChildByName ns "w" "sdtContent" element - = concatMap ((unwrap ns) . Elem) (elChildren sdtContent) + = concatMap (unwrap ns . Elem) (elChildren sdtContent) | isElem ns "w" "smartTag" element - = concatMap ((unwrap ns) . Elem) (elChildren element) + = concatMap (unwrap ns . Elem) (elChildren element) unwrap _ content = [content] unwrapChild :: NameSpaces -> Content -> Content @@ -149,24 +167,20 @@ walkDocument ns element = _ -> Nothing -data Docx = Docx Document +newtype Docx = Docx Document deriving Show data Document = Document NameSpaces Body deriving Show -data Body = Body [BodyPart] +newtype Body = Body [BodyPart] deriving Show type Media = [(FilePath, B.ByteString)] -type CharStyle = (String, RunStyle) +type CharStyleMap = M.Map CharStyleId CharStyle -type ParStyle = (String, ParStyleData) - -type CharStyleMap = M.Map String RunStyle - -type ParStyleMap = M.Map String ParStyleData +type ParStyleMap = M.Map ParaStyleId ParStyle data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] deriving Show @@ -213,12 +227,9 @@ data ChangeInfo = ChangeInfo ChangeId Author ChangeDate data TrackedChange = TrackedChange ChangeType ChangeInfo deriving Show -data ParagraphStyle = ParagraphStyle { pStyle :: [String] +data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle] , indentation :: Maybe ParIndentation , dropCap :: Bool - , pHeading :: Maybe (String, Int) - , pNumInfo :: Maybe (String, String) - , pBlockQuote :: Maybe Bool , pChange :: Maybe TrackedChange } deriving Show @@ -227,9 +238,6 @@ defaultParagraphStyle :: ParagraphStyle defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing , dropCap = False - , pHeading = Nothing - , pNumInfo = Nothing - , pBlockQuote = Nothing , pChange = Nothing } @@ -242,18 +250,61 @@ data BodyPart = Paragraph ParagraphStyle [ParPart] type TblGrid = [Integer] -data TblLook = TblLook {firstRowFormatting::Bool} +newtype TblLook = TblLook {firstRowFormatting::Bool} deriving Show defaultTblLook :: TblLook defaultTblLook = TblLook{firstRowFormatting = False} -data Row = Row [Cell] +newtype Row = Row [Cell] deriving Show -data Cell = Cell [BodyPart] +newtype Cell = Cell [BodyPart] deriving Show +newtype CharStyleId = CharStyleId { fromCharStyleId :: String } + deriving (Show, Eq, Ord, FromStyleId) +newtype ParaStyleId = ParaStyleId { fromParaStyleId :: String } + deriving (Show, Eq, Ord, FromStyleId) + +newtype CharStyleName = CharStyleName { fromCharStyleName :: CIString } + deriving (Show, Eq, Ord, IsString, FromStyleName) +newtype ParaStyleName = ParaStyleName { fromParaStyleName :: CIString } + deriving (Show, Eq, Ord, IsString, FromStyleName) + +-- Case-insensitive comparisons +newtype CIString = CIString String deriving (Show, IsString, FromStyleName) + +class FromStyleName a where + fromStyleName :: a -> String + +instance FromStyleName String where + fromStyleName = id + +class FromStyleId a where + fromStyleId :: a -> String + +instance FromStyleId String where + fromStyleId = id + +instance Eq CIString where + (==) = (==) `on` map toLower . coerce + +instance Ord CIString where + compare = compare `on` map toLower . coerce + +leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle +leftBiasedMergeRunStyle a b = RunStyle + { isBold = isBold a <|> isBold b + , isItalic = isItalic a <|> isItalic b + , isSmallCaps = isSmallCaps a <|> isSmallCaps b + , isStrike = isStrike a <|> isStrike b + , isRTL = isRTL a <|> isRTL b + , rVertAlign = rVertAlign a <|> rVertAlign b + , rUnderline = rUnderline a <|> rUnderline b + , rParentStyle = rParentStyle a + } + -- (width, height) in EMUs type Extent = Maybe (Double, Double) @@ -285,21 +336,28 @@ data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen data VertAlign = BaseLn | SupScrpt | SubScrpt deriving Show -data RunStyle = RunStyle { isBold :: Maybe Bool - , isItalic :: Maybe Bool - , isSmallCaps :: Maybe Bool - , isStrike :: Maybe Bool - , isRTL :: Maybe Bool - , rVertAlign :: Maybe VertAlign - , rUnderline :: Maybe String - , rStyle :: Maybe CharStyle +data CharStyle = CharStyle { cStyleId :: CharStyleId + , cStyleName :: CharStyleName + , cStyleData :: RunStyle + } deriving (Show) + +data RunStyle = RunStyle { isBold :: Maybe Bool + , isItalic :: Maybe Bool + , isSmallCaps :: Maybe Bool + , isStrike :: Maybe Bool + , isRTL :: Maybe Bool + , rVertAlign :: Maybe VertAlign + , rUnderline :: Maybe String + , rParentStyle :: Maybe CharStyle } deriving Show -data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int) - , isBlockQuote :: Maybe Bool - , numInfo :: Maybe (String, String) - , psStyle :: Maybe ParStyle} +data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) + , numInfo :: Maybe (String, String) + , psParentStyle :: Maybe ParStyle + , pStyleName :: ParaStyleName + , pStyleId :: ParaStyleId + } deriving Show defaultRunStyle :: RunStyle @@ -310,7 +368,7 @@ defaultRunStyle = RunStyle { isBold = Nothing , isRTL = Nothing , rVertAlign = Nothing , rUnderline = Nothing - , rStyle = Nothing + , rParentStyle = Nothing } type Target = String @@ -390,7 +448,10 @@ elemToBody ns element | isElem ns "w" "body" element = elemToBody _ _ = throwError WrongElem archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap) -archiveToStyles zf = +archiveToStyles = archiveToStyles' getStyleId getStyleId +archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) => + (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) +archiveToStyles' conv1 conv2 zf = let stylesElem = findEntryByPath "word/styles.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) in @@ -399,19 +460,17 @@ archiveToStyles zf = Just styElem -> let namespaces = elemToNameSpaces styElem in - ( M.fromList $ buildBasedOnList namespaces styElem - (Nothing :: Maybe CharStyle), - M.fromList $ buildBasedOnList namespaces styElem - (Nothing :: Maybe ParStyle) ) + ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing, + M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing) -isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool +isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool isBasedOnStyle ns element parentStyle | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= findAttrByName ns "w" "val" - , Just ps <- parentStyle = basedOnVal == getStyleId ps + , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle @@ -419,30 +478,70 @@ isBasedOnStyle ns element parentStyle , Nothing <- parentStyle = True | otherwise = False -class ElemToStyle a where +class HasStyleId a => ElemToStyle a where cStyleType :: Maybe a -> String elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a - getStyleId :: a -> String + +class FromStyleId (StyleId a) => HasStyleId a where + type StyleId a + getStyleId :: a -> StyleId a + +class FromStyleName (StyleName a) => HasStyleName a where + type StyleName a + getStyleName :: a -> StyleName a + +class HasParentStyle a where + getParentStyle :: a -> Maybe a + +instance HasParentStyle CharStyle where + getParentStyle = rParentStyle . cStyleData + +instance HasParentStyle ParStyle where + getParentStyle = psParentStyle + +getStyleNames :: (Functor t, HasStyleName a) => t a -> t (StyleName a) +getStyleNames = fmap getStyleName + +constructBogusParStyleData :: ParaStyleName -> ParStyle +constructBogusParStyleData stName = ParStyle + { headingLev = Nothing + , numInfo = Nothing + , psParentStyle = Nothing + , pStyleName = stName + , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName + } instance ElemToStyle CharStyle where cStyleType _ = "character" elemToStyle ns element parentStyle | isElem ns "w" "style" element - , Just "character" <- findAttrByName ns "w" "type" element - , Just styleId <- findAttrByName ns "w" "styleId" element = - Just (styleId, elemToRunStyle ns element parentStyle) + , Just "character" <- findAttrByName ns "w" "type" element = + elemToCharStyle ns element parentStyle | otherwise = Nothing - getStyleId s = fst s + +instance HasStyleId CharStyle where + type StyleId CharStyle = CharStyleId + getStyleId = cStyleId + +instance HasStyleName CharStyle where + type StyleName CharStyle = CharStyleName + getStyleName = cStyleName instance ElemToStyle ParStyle where cStyleType _ = "paragraph" elemToStyle ns element parentStyle | isElem ns "w" "style" element , Just "paragraph" <- findAttrByName ns "w" "type" element - , Just styleId <- findAttrByName ns "w" "styleId" element = - Just (styleId, elemToParStyleData ns element parentStyle) + = elemToParStyleData ns element parentStyle | otherwise = Nothing - getStyleId s = fst s + +instance HasStyleId ParStyle where + type StyleId ParStyle = ParaStyleId + getStyleId = pStyleId + +instance HasStyleName ParStyle where + type StyleName ParStyle = ParaStyleName + getStyleName = pStyleName getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] getStyleChildren ns element parentStyle @@ -495,7 +594,7 @@ filePathToRelType "word/_rels/endnotes.xml.rels" _ = Just InEndnote -- -- to see if it's a documentPath, we have to check against the dynamic -- -- docPath specified in "_rels/.rels" filePathToRelType path docXmlPath = - if path == "word/_rels/" ++ (takeFileName docXmlPath) ++ ".rels" + if path == "word/_rels/" ++ takeFileName docXmlPath ++ ".rels" then Just InDocument else Nothing @@ -537,7 +636,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do case lvlOverride of Just (LevelOverride _ _ (Just lvl')) -> Just lvl' Just (LevelOverride _ (Just strt) _) -> - lookup ilvl $ map (\(Level i fmt s _) -> (i, (Level i fmt s (Just strt)))) lvls + lookup ilvl $ map (\(Level i fmt s _) -> (i, Level i fmt s (Just strt))) lvls _ -> lookup ilvl $ map (\l@(Level i _ _ _) -> (i, l)) lvls @@ -693,6 +792,12 @@ testBitMask bitMaskS n = stringToInteger :: String -> Maybe Integer stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) +pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) +pHeading = getParStyleField headingLev . pStyle + +pNumInfo :: ParagraphStyle -> Maybe (String, String) +pNumInfo = getParStyleField numInfo . pStyle + elemToBodyPart :: NameSpaces -> Element -> D BodyPart elemToBodyPart ns element | isElem ns "w" "p" element @@ -703,23 +808,19 @@ elemToBodyPart ns element elemToBodyPart ns element | isElem ns "w" "p" element , Just (numId, lvl) <- getNumInfo ns element = do - sty <- asks envParStyles - let parstyle = elemToParagraphStyle ns element sty + parstyle <- elemToParagraphStyle ns element <$> asks envParStyles parparts <- mapD (elemToParPart ns) (elChildren element) - num <- asks envNumbering - let levelInfo = lookupLevel numId lvl num + levelInfo <- lookupLevel numId lvl <$> asks envNumbering return $ ListItem parstyle numId lvl levelInfo parparts elemToBodyPart ns element | isElem ns "w" "p" element = do - sty <- asks envParStyles - let parstyle = elemToParagraphStyle ns element sty + parstyle <- elemToParagraphStyle ns element <$> asks envParStyles parparts <- mapD (elemToParPart ns) (elChildren element) -- Word uses list enumeration for numbered headings, so we only -- want to infer a list from the styles if it is NOT a heading. case pHeading parstyle of Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do - num <- asks envNumbering - let levelInfo = lookupLevel numId lvl num + levelInfo <- lookupLevel numId lvl <$> asks envNumbering return $ ListItem parstyle numId lvl levelInfo parparts _ -> return $ Paragraph parstyle parparts elemToBodyPart ns element @@ -727,7 +828,7 @@ elemToBodyPart ns element let caption' = findChildByName ns "w" "tblPr" element >>= findChildByName ns "w" "tblCaption" >>= findAttrByName ns "w" "val" - caption = (fromMaybe "" caption') + caption = fromMaybe "" caption' grid' = case findChildByName ns "w" "tblGrid" element of Just g -> elemToTblGrid ns g Nothing -> return [] @@ -1007,20 +1108,18 @@ elemToRun ns element return $ Run runStyle runElems elemToRun _ _ = throwError WrongElem -getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a +getParentStyleValue :: (ParStyle -> Maybe a) -> ParStyle -> Maybe a getParentStyleValue field style | Just value <- field style = Just value - | Just parentStyle <- psStyle style - = getParentStyleValue field (snd parentStyle) + | Just parentStyle <- psParentStyle style + = getParentStyleValue field parentStyle getParentStyleValue _ _ = Nothing -getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] -> - Maybe a -getParStyleField field stylemap styles - | x <- mapMaybe (\x -> M.lookup x stylemap) styles - , (y:_) <- mapMaybe (getParentStyleValue field) x +getParStyleField :: (ParStyle -> Maybe a) -> [ParStyle] -> Maybe a +getParStyleField field styles + | (y:_) <- mapMaybe (getParentStyleValue field) styles = Just y -getParStyleField _ _ _ = Nothing +getParStyleField _ _ = Nothing getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange getTrackedChange ns element @@ -1042,10 +1141,10 @@ elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (findAttrByName ns "w" "val") + (fmap ParaStyleId . findAttrByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle - {pStyle = style + {pStyle = mapMaybe (`M.lookup` sty) style , indentation = findChildByName ns "w" "ind" pPr >>= elemToParIndentation ns @@ -1057,9 +1156,6 @@ elemToParagraphStyle ns element sty Just "none" -> False Just _ -> True Nothing -> False - , pHeading = getParStyleField headingLev sty style - , pNumInfo = getParStyleField numInfo sty style - , pBlockQuote = getParStyleField isBlockQuote sty style , pChange = findChildByName ns "w" "rPr" pPr >>= filterChild (\e -> isElem ns "w" "ins" e || isElem ns "w" "moveTo" e || @@ -1089,16 +1185,20 @@ elemToRunStyleD :: NameSpaces -> Element -> D RunStyle elemToRunStyleD ns element | Just rPr <- findChildByName ns "w" "rPr" element = do charStyles <- asks envCharStyles - let parentSty = case + let parentSty = findChildByName ns "w" "rStyle" rPr >>= - findAttrByName ns "w" "val" - of - Just styName | Just style <- M.lookup styName charStyles -> - Just (styName, style) - _ -> Nothing + findAttrByName ns "w" "val" >>= + flip M.lookup charStyles . CharStyleId return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle +elemToCharStyle :: NameSpaces + -> Element -> Maybe CharStyle -> Maybe CharStyle +elemToCharStyle ns element parentStyle + = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) + <*> getElementStyleName ns element + <*> (Just $ elemToRunStyle ns element parentStyle) + elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle elemToRunStyle ns element parentStyle | Just rPr <- findChildByName ns "w" "rPr" element = @@ -1121,38 +1221,23 @@ elemToRunStyle ns element parentStyle , rUnderline = findChildByName ns "w" "u" rPr >>= findAttrByName ns "w" "val" - , rStyle = parentStyle + , rParentStyle = parentStyle } elemToRunStyle _ _ _ = defaultRunStyle -getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int) +getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) getHeaderLevel ns element - | Just styleId <- findAttrByName ns "w" "styleId" element - , Just index <- stripPrefix "Heading" styleId - , Just n <- stringToInteger index - , n > 0 = Just (styleId, fromInteger n) - | Just styleId <- findAttrByName ns "w" "styleId" element - , Just index <- findChildByName ns "w" "name" element >>= - findAttrByName ns "w" "val" >>= - stripPrefix "heading " - , Just n <- stringToInteger index - , n > 0 = Just (styleId, fromInteger n) + | Just styleName <- getElementStyleName ns element + , Just n <- stringToInteger =<< + (stripPrefix "heading " . map toLower $ + fromStyleName styleName) + , n > 0 = Just (styleName, fromInteger n) getHeaderLevel _ _ = Nothing -blockQuoteStyleIds :: [String] -blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"] - -blockQuoteStyleNames :: [String] -blockQuoteStyleNames = ["Quote", "Block Text"] - -getBlockQuote :: NameSpaces -> Element -> Maybe Bool -getBlockQuote ns element - | Just styleId <- findAttrByName ns "w" "styleId" element - , styleId `elem` blockQuoteStyleIds = Just True - | Just styleName <- findChildByName ns "w" "name" element >>= - findAttrByName ns "w" "val" - , styleName `elem` blockQuoteStyleNames = Just True -getBlockQuote _ _ = Nothing +getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a +getElementStyleName ns el = coerce <$> + ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") + <|> findAttrByName ns "w" "styleId" el) getNumInfo :: NameSpaces -> Element -> Maybe (String, String) getNumInfo ns element = do @@ -1167,15 +1252,19 @@ getNumInfo ns element = do return (numId, lvl) -elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData -elemToParStyleData ns element parentStyle = - ParStyleData +elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle +elemToParStyleData ns element parentStyle + | Just styleId <- findAttrByName ns "w" "styleId" element + , Just styleName <- getElementStyleName ns element + = Just $ ParStyle { headingLev = getHeaderLevel ns element - , isBlockQuote = getBlockQuote ns element , numInfo = getNumInfo ns element - , psStyle = parentStyle - } + , psParentStyle = parentStyle + , pStyleName = styleName + , pStyleId = ParaStyleId styleId + } +elemToParStyleData _ _ _ = Nothing elemToRunElem :: NameSpaces -> Element -> D RunElem elemToRunElem ns element diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 02db23db5..d62dbeedb 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -208,7 +208,7 @@ writeDocx opts doc@(Pandoc meta _) = do let doc' = walk fixDisplayMath doc username <- P.lookupEnv "USERNAME" utctime <- P.getCurrentTime - distArchive <- (toArchive . BL.fromStrict) <$> do + distArchive <- toArchive . BL.fromStrict <$> do oldUserDataDir <- P.getUserDataDir P.setUserDataDir Nothing res <- P.readDefaultDataFile "reference.docx" @@ -216,7 +216,7 @@ writeDocx opts doc@(Pandoc meta _) = do return res refArchive <- case writerReferenceDoc opts of Just f -> toArchive <$> P.readFileLazy f - Nothing -> (toArchive . BL.fromStrict) <$> + Nothing -> toArchive . BL.fromStrict <$> P.readDataFile "reference.docx" parsedDoc <- parseXml refArchive distArchive "word/document.xml" @@ -237,7 +237,7 @@ writeDocx opts doc@(Pandoc meta _) = do >>= subtrct mbAttrMarRight >>= subtrct mbAttrMarLeft where - subtrct mbStr = \x -> mbStr >>= safeRead >>= (\y -> Just $ x - y) + subtrct mbStr x = mbStr >>= safeRead >>= (\y -> Just $ x - y) -- styles mblang <- toLang $ getLang opts meta @@ -285,7 +285,7 @@ writeDocx opts doc@(Pandoc meta _) = do envRTL = isRTLmeta , envChangesAuthor = fromMaybe "unknown" username , envChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime - , envPrintWidth = maybe 420 (\x -> quot x 20) pgContentWidth + , envPrintWidth = maybe 420 (`quot` 20) pgContentWidth } @@ -366,7 +366,7 @@ writeDocx opts doc@(Pandoc meta _) = do map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ map mkImageOverride imgs ++ - map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive + [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive , "word/media/" `isPrefixOf` eRelativePath e ] let defaultnodes = [mknode "Default" @@ -589,8 +589,8 @@ writeDocx opts doc@(Pandoc meta _) = do mapMaybe (fmap ("word/" ++) . extractTarget) (headers ++ footers) let miscRelEntries = [ e | e <- zEntries refArchive - , "word/_rels/" `isPrefixOf` (eRelativePath e) - , ".xml.rels" `isSuffixOf` (eRelativePath e) + , "word/_rels/" `isPrefixOf` eRelativePath e + , ".xml.rels" `isSuffixOf` eRelativePath e , eRelativePath e /= "word/_rels/document.xml.rels" , eRelativePath e /= "word/_rels/footnotes.xml.rels" ] let otherMediaEntries = [ e | e <- zEntries refArchive @@ -778,24 +778,24 @@ makeTOC opts = do tocTitle <- gets stTocTitle title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) return - [mknode "w:sdt" [] ([ + [mknode "w:sdt" [] [ mknode "w:sdtPr" [] ( - mknode "w:docPartObj" [] ( + mknode "w:docPartObj" [] [mknode "w:docPartGallery" [("w:val","Table of Contents")] (), mknode "w:docPartUnique" [] ()] - ) -- w:docPartObj + -- w:docPartObj ), -- w:sdtPr mknode "w:sdtContent" [] (title++[ mknode "w:p" [] ( - mknode "w:r" [] ([ + mknode "w:r" [] [ mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (), mknode "w:instrText" [("xml:space","preserve")] tocCmd, mknode "w:fldChar" [("w:fldCharType","separate")] (), mknode "w:fldChar" [("w:fldCharType","end")] () - ]) -- w:r + ] -- w:r ) -- w:p ]) - ])] -- w:sdt + ]] -- w:sdt -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). @@ -809,12 +809,12 @@ writeOpenXML opts (Pandoc meta blocks) = do let includeTOC = writerTableOfContents opts || lookupMetaBool "toc" meta title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ + authors <- withParaPropM (pStyleM "Author") $ blocksToOpenXML opts $ map Para auths date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] abstract <- if null abstract' then return [] - else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' + else withParaPropM (pStyleM "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs @@ -848,18 +848,12 @@ writeOpenXML opts (Pandoc meta blocks) = do blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pCustomStyle :: String -> Element -pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () - pStyleM :: (PandocMonad m) => String -> WS m XML.Element pStyleM styleName = do styleMaps <- gets stStyleMaps let sty' = getStyleId styleName $ sParaStyleMap styleMaps return $ mknode "w:pStyle" [("w:val",sty')] () -rCustomStyle :: String -> Element -rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () - rStyleM :: (PandocMonad m) => String -> WS m XML.Element rStyleM styleName = do styleMaps <- gets stStyleMaps @@ -921,19 +915,19 @@ blockToOpenXML' opts (Header lev (ident,_,_) lst) = do blockToOpenXML' opts (Plain lst) = do isInTable <- gets stInTable let block = blockToOpenXML opts (Para lst) - para <- if isInTable then withParaProp (pCustomStyle "Compact") block else block + prop <- pStyleM "Compact" + para <- if isInTable then withParaProp prop block else block return $ para - -- title beginning with fig: indicates that the image is a figure blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do setFirstPara - let prop = pCustomStyle $ + prop <- pStyleM $ if null alt then "Figure" - else "CaptionedFigure" + else "Captioned Figure" paraProps <- local (\env -> env { envParaProperties = EnvProps (Just prop) [] <> envParaProperties env }) (getParaProps False) contents <- inlinesToOpenXML opts [Image attr alt (src,tit)] - captionNode <- withParaProp (pCustomStyle "ImageCaption") + captionNode <- withParaPropM (pStyleM "Image Caption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode blockToOpenXML' opts (Para lst) @@ -944,10 +938,10 @@ blockToOpenXML' opts (Para lst) [x] -> isDisplayMath x _ -> False paraProps <- getParaProps displayMathPara - bodyTextStyle <- pStyleM "Body Text" + bodyTextStyle <- if isFirstPara + then pStyleM "First Paragraph" + else pStyleM "Body Text" let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] - [pCustomStyle "FirstParagraph"]] [] -> [mknode "w:pPr" [] [bodyTextStyle]] ps -> ps modify $ \s -> s { stFirstPara = False } @@ -965,7 +959,7 @@ blockToOpenXML' opts (BlockQuote blocks) = do setFirstPara return p blockToOpenXML' opts (CodeBlock attrs@(ident, _, _) str) = do - p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str]) + p <- withParaPropM (pStyleM "Source Code") (blockToOpenXML opts $ Para [Code attrs str]) setFirstPara wrapBookmark ident p blockToOpenXML' _ HorizontalRule = do @@ -981,7 +975,7 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do let captionStr = stringify caption caption' <- if null caption then return [] - else withParaProp (pCustomStyle "TableCaption") + else withParaPropM (pStyleM "Table Caption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () -- Table cells require a <w:p> element, even an empty one! @@ -997,7 +991,8 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] - let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] [pCustomStyle "Compact"]]] + compactStyle <- pStyleM "Compact" + let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents @@ -1030,20 +1025,17 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do : [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' )] -blockToOpenXML' opts (BulletList lst) = do - let marker = BulletMarker - addList marker - numid <- getNumId - l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst - setFirstPara - return l -blockToOpenXML' opts (OrderedList (start, numstyle, numdelim) lst) = do - let marker = NumberMarker numstyle numdelim start - addList marker - numid <- getNumId - l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst - setFirstPara - return l +blockToOpenXML' opts el + | BulletList lst <- el = addOpenXMLList BulletMarker lst + | OrderedList (start, numstyle, numdelim) lst <- el + = addOpenXMLList (NumberMarker numstyle numdelim start) lst + where + addOpenXMLList marker lst = do + addList marker + numid <- getNumId + l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst + setFirstPara + return l blockToOpenXML' opts (DefinitionList items) = do l <- concat `fmap` mapM (definitionListItemToOpenXML opts) items setFirstPara @@ -1051,9 +1043,9 @@ blockToOpenXML' opts (DefinitionList items) = do definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element] definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaProp (pCustomStyle "DefinitionTerm") + term' <- withParaPropM (pStyleM "Definition Term") $ blockToOpenXML opts (Para term) - defs' <- withParaProp (pCustomStyle "Definition") + defs' <- withParaPropM (pStyleM "Definition") $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' @@ -1159,7 +1151,7 @@ inlineToOpenXML' _ (Str str) = formattedString str inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ") inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ") -inlineToOpenXML' opts (Span (_,["underline"],_) ils) = do +inlineToOpenXML' opts (Span (_,["underline"],_) ils) = withTextProp (mknode "w:u" [("w:val","single")] ()) $ inlinesToOpenXML opts ils inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do @@ -1192,18 +1184,21 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do Just "rtl" -> local (\env -> env { envRTL = True }) Just "ltr" -> local (\env -> env { envRTL = False }) _ -> id - let off x = withTextProp (mknode x [("w:val","0")] ()) - let pmod = (if "csl-no-emph" `elem` classes then off "w:i" else id) . + off x = withTextProp (mknode x [("w:val","0")] ()) + pmod = (if "csl-no-emph" `elem` classes then off "w:i" else id) . (if "csl-no-strong" `elem` classes then off "w:b" else id) . (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id) + getChangeAuthorDate = do + defaultAuthor <- asks envChangesAuthor + defaultDate <- asks envChangesDate + let author = fromMaybe defaultAuthor (lookup "author" kvs) + date = fromMaybe defaultDate (lookup "date" kvs) + return (author, date) insmod <- if "insertion" `elem` classes then do - defaultAuthor <- asks envChangesAuthor - defaultDate <- asks envChangesDate - let author = fromMaybe defaultAuthor (lookup "author" kvs) - date = fromMaybe defaultDate (lookup "date" kvs) + (author, date) <- getChangeAuthorDate insId <- gets stInsId modify $ \s -> s{stInsId = insId + 1} return $ \f -> do @@ -1215,10 +1210,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do else return id delmod <- if "deletion" `elem` classes then do - defaultAuthor <- asks envChangesAuthor - defaultDate <- asks envChangesDate - let author = fromMaybe defaultAuthor (lookup "author" kvs) - date = fromMaybe defaultDate (lookup "date" kvs) + (author, date) <- getChangeAuthorDate delId <- gets stDelId modify $ \s -> s{stDelId = delId + 1} return $ \f -> local (\env->env{envInDel=True}) $ do @@ -1266,14 +1258,17 @@ inlineToOpenXML' opts (Math mathType str) = do Left il -> inlineToOpenXML' opts il inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML' opts (Code attrs str) = do + let alltoktypes = [KeywordTok ..] + tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (show tt)) alltoktypes let unhighlighted = intercalate [br] `fmap` mapM formattedString (lines str) formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) - toHlTok (toktype,tok) = mknode "w:r" [] - [ mknode "w:rPr" [] - [ rCustomStyle (show toktype) ] - , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ] - withTextProp (rCustomStyle "VerbatimChar") + toHlTok (toktype,tok) = + mknode "w:r" [] + [ mknode "w:rPr" [] $ + maybeToList (lookup toktype tokTypesMap) + , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ] + withTextPropM (rStyleM "Verbatim Char") $ if isNothing (writerHighlightStyle opts) then unhighlighted else case highlight (writerSyntaxMap opts) @@ -1431,12 +1426,12 @@ defaultFootnotes :: [Element] defaultFootnotes = [ mknode "w:footnote" [("w:type", "separator"), ("w:id", "-1")] [ mknode "w:p" [] - [mknode "w:r" [] $ + [mknode "w:r" [] [ mknode "w:separator" [] ()]]] , mknode "w:footnote" [("w:type", "continuationSeparator"), ("w:id", "0")] [ mknode "w:p" [] - [ mknode "w:r" [] $ + [ mknode "w:r" [] [ mknode "w:continuationSeparator" [] ()]]]] diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 9d0913e55..583a6ec18 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -256,6 +256,10 @@ tests = [ testGroup "document" "docx/lists.docx" "docx/lists.native" , testCompare + "compact lists" + "docx/lists-compact.docx" + "docx/lists-compact.native" + , testCompare "lists with level overrides" "docx/lists_level_override.docx" "docx/lists_level_override.native" @@ -425,6 +429,11 @@ tests = [ testGroup "document" "custom styles (`+styles`) enabled" "docx/custom-style-reference.docx" "docx/custom-style-with-styles.native" + , testCompareWithOpts + def{readerExtensions=extensionsFromList [Ext_styles]} + "custom styles (`+styles`): Compact style is removed from output" + "docx/compact-style-removal.docx" + "docx/compact-style-removal.native" ] , testGroup "metadata" [ testCompareWithOpts def{readerStandalone=True} diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native index 804ad8732..6d8269b21 100644 --- a/test/docx/0_level_headers.native +++ b/test/docx/0_level_headers.native @@ -15,10 +15,10 @@ ,Para [Str "FIGURES",Space,Str "iv"] ,Para [Str "TABLES",Space,Str "v"] ,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Str "2"] -,Header 1 ("figures",["Heading0"],[]) [Str "FIGURES"] +,Header 1 ("figures",["Heading-0"],[]) [Str "FIGURES"] ,Para [Strong [Str "Figure",Space,Str "Page"]] ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]] -,Header 1 ("tables",["Heading0"],[]) [Str "TABLES"] +,Header 1 ("tables",["Heading-0"],[]) [Str "TABLES"] ,Para [Strong [Str "Table",Space,Str "Page"]] ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]] ,Header 1 ("introduction",[],[]) [Str "Introduction"] diff --git a/test/docx/adjacent_codeblocks.docx b/test/docx/adjacent_codeblocks.docx Binary files differindex d61fb45d5..0fd44a183 100644 --- a/test/docx/adjacent_codeblocks.docx +++ b/test/docx/adjacent_codeblocks.docx diff --git a/test/docx/compact-style-removal.docx b/test/docx/compact-style-removal.docx Binary files differnew file mode 100644 index 000000000..fde0064db --- /dev/null +++ b/test/docx/compact-style-removal.docx diff --git a/test/docx/compact-style-removal.native b/test/docx/compact-style-removal.native new file mode 100644 index 000000000..340878ba0 --- /dev/null +++ b/test/docx/compact-style-removal.native @@ -0,0 +1,5 @@ +[OrderedList (1,Decimal,Period) + [[Plain [Str "One"]] + ,[Plain [Str "Two"]] + ,[Plain [Str "Three"]] + ,[Plain [Str "Four"]]]] diff --git a/test/docx/custom-style-with-styles.native b/test/docx/custom-style-with-styles.native index 61f11911d..1ad7d88cc 100644 --- a/test/docx/custom-style-with-styles.native +++ b/test/docx/custom-style-with-styles.native @@ -1,7 +1,7 @@ -[Div ("",[],[("custom-style","FirstParagraph")]) +[Div ("",[],[("custom-style","First Paragraph")]) [Para [Str "This",Space,Str "is",Space,Str "some",Space,Str "text."]] -,Div ("",[],[("custom-style","BodyText")]) +,Div ("",[],[("custom-style","Body Text")]) [Para [Str "This",Space,Str "is",Space,Str "text",Space,Str "with",Space,Str "an",Space,Span ("",[],[("custom-style","Emphatic")]) [Str "emphasized"],Space,Str "text",Space,Str "style.",Space,Str "And",Space,Str "this",Space,Str "is",Space,Str "text",Space,Str "with",Space,Str "a",Space,Span ("",[],[("custom-style","Strengthened")]) [Str "strengthened"],Space,Str "text",Space,Str "style."]] -,Div ("",[],[("custom-style","MyBlockStyle")]) +,Div ("",[],[("custom-style","My Block Style")]) [BlockQuote [Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "styled",Space,Str "paragraph",Space,Str "that",Space,Str "inherits",Space,Str "from",Space,Str "Block",Space,Str "Text."]]]] diff --git a/test/docx/lists-compact.docx b/test/docx/lists-compact.docx Binary files differnew file mode 100644 index 000000000..d7f9e4a06 --- /dev/null +++ b/test/docx/lists-compact.docx diff --git a/test/docx/lists-compact.native b/test/docx/lists-compact.native new file mode 100644 index 000000000..340878ba0 --- /dev/null +++ b/test/docx/lists-compact.native @@ -0,0 +1,5 @@ +[OrderedList (1,Decimal,Period) + [[Plain [Str "One"]] + ,[Plain [Str "Two"]] + ,[Plain [Str "Three"]] + ,[Plain [Str "Four"]]]] diff --git a/test/docx/lists.docx b/test/docx/lists.docx Binary files differindex 8b46351d9..356dc1ea9 100644 --- a/test/docx/lists.docx +++ b/test/docx/lists.docx diff --git a/test/docx/lists.native b/test/docx/lists.native index af922b335..1192da709 100644 --- a/test/docx/lists.native +++ b/test/docx/lists.native @@ -15,4 +15,4 @@ ,Para [Str "Sub",Space,Str "paragraph"]]]]]] ,[Para [Str "Same",Space,Str "list"]]] ,BulletList - [[Para [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]] + [[Plain [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]] diff --git a/test/docx/nested_anchors_in_header.native b/test/docx/nested_anchors_in_header.native index 562f60215..314b31663 100644 --- a/test/docx/nested_anchors_in_header.native +++ b/test/docx/nested_anchors_in_header.native @@ -1,4 +1,4 @@ -[Header 1 ("\1086\1075\1083\1072\1074\1083\1077\1085\1080\1077",["TOCHeading"],[]) [Str "\1054\1075\1083\1072\1074\1083\1077\1085\1080\1077"] +[Header 1 ("\1086\1075\1083\1072\1074\1083\1077\1085\1080\1077",["TOC-Heading"],[]) [Str "\1054\1075\1083\1072\1074\1083\1077\1085\1080\1077"] ,Para [Link ("",[],[]) [Str "Short",Space,Str "instructions",Space,Str "1"] ("#short-instructions","")] ,Para [Link ("",[],[]) [Str "Some",Space,Str "instructions",Space,Str "1"] ("#some-instructions","")] ,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "opening",Space,Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")] |