aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs162
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs25
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs283
-rw-r--r--test/Tests/Readers/Docx.hs9
-rw-r--r--test/docx/compact-style-removal.docxbin0 -> 9951 bytes
-rw-r--r--test/docx/compact-style-removal.native5
-rw-r--r--test/docx/lists-compact.docxbin0 -> 9952 bytes
-rw-r--r--test/docx/lists-compact.native5
8 files changed, 306 insertions, 183 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index a26986af2..9d17ab118 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -3,6 +3,7 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE MultiWayIf #-}
+{-# LANGUAGE FlexibleContexts #-}
{- |
Module : Text.Pandoc.Readers.Docx
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -65,6 +66,7 @@ import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect)
+import Data.Char (isSpace)
import qualified Data.Map as M
import Data.Maybe (isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
@@ -133,13 +135,13 @@ evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
evalDocxContext ctx env st = flip evalStateT st $ runReaderT ctx env
-- This is empty, but we put it in for future-proofing.
-spansToKeep :: [String]
+spansToKeep :: [CharStyleName]
spansToKeep = []
-divsToKeep :: [String]
-divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
+divsToKeep :: [ParaStyleName]
+divsToKeep = ["Definition", "Definition Term"]
-metaStyles :: M.Map String String
+metaStyles :: M.Map ParaStyleName String
metaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Author", "author")
@@ -151,7 +153,7 @@ sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp)
isMetaPar :: BodyPart -> Bool
isMetaPar (Paragraph pPr _) =
- not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
+ not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles)
isMetaPar _ = False
isEmptyPar :: BodyPart -> Bool
@@ -168,7 +170,7 @@ bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String M
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
- , (c : _)<- (pStyle pPr) `intersect` (M.keys metaStyles)
+ , (c : _)<- getStyleNames (pStyle pPr) `intersect` M.keys metaStyles
, (Just metaField) <- M.lookup c metaStyles = do
inlines <- smushInlines <$> mapM parPartToInlines parParts
remaining <- bodyPartsToMeta' bps
@@ -198,11 +200,29 @@ fixAuthors (MetaBlocks blks) =
g _ = MetaInlines []
fixAuthors mv = mv
-codeStyles :: [String]
-codeStyles = ["VerbatimChar"]
+isInheritedFromStyles :: (Eq (StyleName s), HasStyleName s, HasParentStyle s) => [StyleName s] -> s -> Bool
+isInheritedFromStyles names sty
+ | getStyleName sty `elem` names = True
+ | Just psty <- getParentStyle sty = isInheritedFromStyles names psty
+ | otherwise = False
-codeDivs :: [String]
-codeDivs = ["SourceCode"]
+hasStylesInheritedFrom :: [ParaStyleName] -> ParagraphStyle -> Bool
+hasStylesInheritedFrom ns s = any (isInheritedFromStyles ns) $ pStyle s
+
+removeStyleNamed :: ParaStyleName -> ParagraphStyle -> ParagraphStyle
+removeStyleNamed sn ps = ps{pStyle = filter (\psd -> getStyleName psd /= sn) $ pStyle ps}
+
+isCodeCharStyle :: CharStyle -> Bool
+isCodeCharStyle = isInheritedFromStyles ["Verbatim Char"]
+
+isCodeDiv :: ParagraphStyle -> Bool
+isCodeDiv = hasStylesInheritedFrom ["Source Code"]
+
+isBlockQuote :: ParStyle -> Bool
+isBlockQuote =
+ isInheritedFromStyles [
+ "Quote", "Block Text", "Block Quote", "Block Quotation"
+ ]
runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s
@@ -228,57 +248,31 @@ parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
parPartToString _ = ""
-blacklistedCharStyles :: [String]
+blacklistedCharStyles :: [CharStyleName]
blacklistedCharStyles = ["Hyperlink"]
resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle
resolveDependentRunStyle rPr
- | Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles =
+ | Just s <- rParentStyle rPr
+ , getStyleName s `elem` blacklistedCharStyles =
return rPr
- | Just (_, cs) <- rStyle rPr = do
+ | Just s <- rParentStyle rPr = do
opts <- asks docxOptions
if isEnabled Ext_styles opts
then return rPr
- else do rPr' <- resolveDependentRunStyle cs
- return $
- RunStyle { isBold = case isBold rPr of
- Just bool -> Just bool
- Nothing -> isBold rPr'
- , isItalic = case isItalic rPr of
- Just bool -> Just bool
- Nothing -> isItalic rPr'
- , isSmallCaps = case isSmallCaps rPr of
- Just bool -> Just bool
- Nothing -> isSmallCaps rPr'
- , isStrike = case isStrike rPr of
- Just bool -> Just bool
- Nothing -> isStrike rPr'
- , isRTL = case isRTL rPr of
- Just bool -> Just bool
- Nothing -> isRTL rPr'
- , rVertAlign = case rVertAlign rPr of
- Just valign -> Just valign
- Nothing -> rVertAlign rPr'
- , rUnderline = case rUnderline rPr of
- Just ulstyle -> Just ulstyle
- Nothing -> rUnderline rPr'
- , rStyle = rStyle rPr
- }
+ else leftBiasedMergeRunStyle rPr <$> resolveDependentRunStyle (cStyleData s)
| otherwise = return rPr
runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines)
runStyleToTransform rPr
- | Just (s, _) <- rStyle rPr
- , s `elem` spansToKeep = do
- transform <- runStyleToTransform rPr{rStyle = Nothing}
- return $ spanWith ("", [s], []) . transform
- | Just (s, _) <- rStyle rPr = do
- opts <- asks docxOptions
- let extraInfo = if isEnabled Ext_styles opts
- then spanWith ("", [], [("custom-style", s)])
- else id
- transform <- runStyleToTransform rPr{rStyle = Nothing}
- return $ extraInfo . transform
+ | Just sn <- getStyleName <$> rParentStyle rPr
+ , sn `elem` spansToKeep = do
+ transform <- runStyleToTransform rPr{rParentStyle = Nothing}
+ return $ spanWith ("", [normalizeToClassName sn], []) . transform
+ | Just s <- rParentStyle rPr = do
+ ei <- extraInfo spanWith s
+ transform <- runStyleToTransform rPr{rParentStyle = Nothing}
+ return $ ei . transform
| Just True <- isItalic rPr = do
transform <- runStyleToTransform rPr{isItalic = Nothing}
return $ emph . transform
@@ -310,8 +304,7 @@ runStyleToTransform rPr
runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
- | Just (s, _) <- rStyle rs
- , s `elem` codeStyles = do
+ | maybe False isCodeCharStyle $ rParentStyle rs = do
rPr <- resolveDependentRunStyle rs
let codeString = code $ concatMap runElemToString runElems
return $ case rVertAlign rPr of
@@ -526,39 +519,49 @@ trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
isSp LineBreak = True
isSp _ = False
+extraInfo :: (Eq (StyleName a), PandocMonad m, HasStyleName a)
+ => (Attr -> i -> i) -> a -> DocxContext m (i -> i)
+extraInfo f s = do
+ opts <- asks docxOptions
+ return $ if | isEnabled Ext_styles opts
+ -> f ("", [], [("custom-style", fromStyleName $ getStyleName s)])
+ | otherwise -> id
+
parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
parStyleToTransform pPr
| (c:cs) <- pStyle pPr
- , c `elem` divsToKeep = do
+ , getStyleName c `elem` divsToKeep = do
let pPr' = pPr { pStyle = cs }
transform <- parStyleToTransform pPr'
- return $ divWith ("", [c], []) . transform
+ return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform
| (c:cs) <- pStyle pPr,
- c `elem` listParagraphDivs = do
+ getStyleName c `elem` listParagraphStyles = do
let pPr' = pPr { pStyle = cs, indentation = Nothing}
transform <- parStyleToTransform pPr'
- return $ divWith ("", [c], []) . transform
+ return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform
| (c:cs) <- pStyle pPr = do
- opts <- asks docxOptions
- let pPr' = pPr { pStyle = cs}
+ let pPr' = pPr { pStyle = cs }
transform <- parStyleToTransform pPr'
- let extraInfo = if isEnabled Ext_styles opts
- then divWith ("", [], [("custom-style", c)])
- else id
- return $ extraInfo . (if fromMaybe False (pBlockQuote pPr) then blockQuote else id) . transform
+ ei <- extraInfo divWith c
+ return $ ei . (if isBlockQuote c then blockQuote else id) . transform
| null (pStyle pPr)
, Just left <- indentation pPr >>= leftParIndent = do
let pPr' = pPr { indentation = Nothing }
hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent
transform <- parStyleToTransform pPr'
- return $ if (left - hang) > 0
+ return $ if (left - hang) > 0
then blockQuote . transform
else transform
parStyleToTransform _ = return id
+normalizeToClassName :: (FromStyleName a) => a -> String
+normalizeToClassName = map go . fromStyleName
+ where go c | isSpace c = '-'
+ | otherwise = c
+
bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
- | not $ null $ codeDivs `intersect` (pStyle pPr) = do
+ | isCodeDiv pPr = do
transform <- parStyleToTransform pPr
return $
transform $
@@ -568,13 +571,16 @@ bodyPartToBlocks (Paragraph pPr parparts)
ils <-local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
makeHeaderAnchor $
- headerWith ("", delete style (pStyle pPr), []) n ils
+ headerWith ("", map normalizeToClassName . delete style $ getStyleNames (pStyle pPr), []) n ils
| otherwise = do
ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts
prevParaIls <- gets docxPrevPara
dropIls <- gets docxDropCap
let ils' = dropIls <> ils
- if dropCap pPr
+ let (paraOrPlain, pPr')
+ | hasStylesInheritedFrom ["Compact"] pPr = (plain, removeStyleNamed "Compact" pPr)
+ | otherwise = (para, pPr)
+ if dropCap pPr'
then do modify $ \s -> s { docxDropCap = ils' }
return mempty
else do modify $ \s -> s { docxDropCap = mempty }
@@ -583,41 +589,41 @@ bodyPartToBlocks (Paragraph pPr parparts)
ils'
handleInsertion = do
modify $ \s -> s {docxPrevPara = mempty}
- transform <- parStyleToTransform pPr
- return $ transform $ para ils''
+ transform <- parStyleToTransform pPr'
+ return $ transform $ paraOrPlain ils''
opts <- asks docxOptions
if | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) ->
return mempty
- | Just (TrackedChange Insertion _) <- pChange pPr
+ | Just (TrackedChange Insertion _) <- pChange pPr'
, AcceptChanges <- readerTrackChanges opts ->
handleInsertion
- | Just (TrackedChange Insertion _) <- pChange pPr
+ | Just (TrackedChange Insertion _) <- pChange pPr'
, RejectChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = ils''}
return mempty
- | Just (TrackedChange Insertion cInfo) <- pChange pPr
+ | Just (TrackedChange Insertion cInfo) <- pChange pPr'
, AllChanges <- readerTrackChanges opts
, ChangeInfo _ cAuthor cDate <- cInfo -> do
let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
- transform <- parStyleToTransform pPr
+ transform <- parStyleToTransform pPr'
return $ transform $
- para $ ils'' <> insertMark
- | Just (TrackedChange Deletion _) <- pChange pPr
+ paraOrPlain $ ils'' <> insertMark
+ | Just (TrackedChange Deletion _) <- pChange pPr'
, AcceptChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = ils''}
return mempty
- | Just (TrackedChange Deletion _) <- pChange pPr
+ | Just (TrackedChange Deletion _) <- pChange pPr'
, RejectChanges <- readerTrackChanges opts ->
handleInsertion
- | Just (TrackedChange Deletion cInfo) <- pChange pPr
+ | Just (TrackedChange Deletion cInfo) <- pChange pPr'
, AllChanges <- readerTrackChanges opts
, ChangeInfo _ cAuthor cDate <- cInfo -> do
let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
- transform <- parStyleToTransform pPr
+ transform <- parStyleToTransform pPr'
return $ transform $
- para $ ils'' <> insertMark
+ paraOrPlain $ ils'' <> insertMark
| otherwise -> handleInsertion
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
-- We check whether this current numId has previously been used,
@@ -638,7 +644,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
blks <- bodyPartToBlocks (Paragraph pPr parparts)
return $ divWith ("", ["list-item"], kvs) blks
bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
- let pPr' = pPr {pStyle = "ListParagraph": pStyle pPr}
+ let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
in
bodyPartToBlocks $ Paragraph pPr' parparts
bodyPartToBlocks (Tbl _ _ _ []) =
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index cc390f122..eb24640c5 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Lists
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -14,13 +15,16 @@ Functions for converting flat docx paragraphs into nested lists.
module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
, blocksToDefinitions
, listParagraphDivs
+ , listParagraphStyles
) where
import Prelude
import Data.List
import Data.Maybe
+import Data.String (fromString)
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.JSON
+import Text.Pandoc.Readers.Docx.Parse (ParaStyleName)
import Text.Pandoc.Shared (trim, safeRead)
isListItem :: Block -> Bool
@@ -79,7 +83,10 @@ getListType b@(Div (_, _, kvs) _) | isListItem b =
getListType _ = Nothing
listParagraphDivs :: [String]
-listParagraphDivs = ["ListParagraph"]
+listParagraphDivs = ["list-paragraph"]
+
+listParagraphStyles :: [ParaStyleName]
+listParagraphStyles = map fromString listParagraphDivs
-- This is a first stab at going through and attaching meaning to list
-- paragraphs, without an item marker, following a list item. We
@@ -160,7 +167,7 @@ blocksToDefinitions' defAcc acc [] =
reverse $ DefinitionList (reverse defAcc) : acc
blocksToDefinitions' defAcc acc
(Div (_, classes1, _) blks1 : Div (ident2, classes2, kvs2) blks2 : blks)
- | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 =
+ | "Definition-Term" `elem` classes1 && "Definition" `elem` classes2 =
let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
in
@@ -169,12 +176,12 @@ blocksToDefinitions' ((defTerm, defItems):defs) acc
(Div (ident2, classes2, kvs2) blks2 : blks)
| "Definition" `elem` classes2 =
let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
- defItems2 = case remainingAttr2 == ("", [], []) of
- True -> blks2
- False -> [Div remainingAttr2 blks2]
- defAcc' = case null defItems of
- True -> (defTerm, [defItems2]) : defs
- False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
+ defItems2 = if remainingAttr2 == ("", [], [])
+ then blks2
+ else [Div remainingAttr2 blks2]
+ defAcc' = if null defItems
+ then (defTerm, [defItems2]) : defs
+ else (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
in
blocksToDefinitions' defAcc' acc blks
blocksToDefinitions' [] acc (b:blks) =
@@ -198,7 +205,5 @@ removeListDivs' blk = [blk]
removeListDivs :: [Block] -> [Block]
removeListDivs = concatMap removeListDivs'
-
-
blocksToDefinitions :: [Block] -> [Block]
blocksToDefinitions = blocksToDefinitions' [] []
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 330c9208f..00c5fb0be 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,7 +1,11 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -31,6 +35,8 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, VertAlign(..)
, ParIndentation(..)
, ParagraphStyle(..)
+ , ParStyle
+ , CharStyle(cStyleData)
, Row(..)
, Cell(..)
, TrackedChange(..)
@@ -38,8 +44,17 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, ChangeInfo(..)
, FieldInfo(..)
, Level(..)
+ , ParaStyleName
+ , CharStyleName
+ , FromStyleName(..)
+ , HasStyleName(..)
+ , HasParentStyle(..)
, archiveToDocx
, archiveToDocxWithWarnings
+ , getStyleNames
+ , pHeading
+ , constructBogusParStyleData
+ , leftBiasedMergeRunStyle
) where
import Prelude
import Codec.Archive.Zip
@@ -49,10 +64,13 @@ import Control.Monad.Reader
import Control.Monad.State.Strict
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
-import Data.Char (chr, ord, readLitChar)
+import Data.Char (chr, ord, readLitChar, toLower)
import Data.List
+import Data.Function (on)
+import Data.String (IsString(..))
import qualified Data.Map as M
import Data.Maybe
+import Data.Coerce
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
import Text.Pandoc.Readers.Docx.Fields
@@ -160,13 +178,9 @@ newtype Body = Body [BodyPart]
type Media = [(FilePath, B.ByteString)]
-type CharStyle = (String, RunStyle)
+type CharStyleMap = M.Map CharStyleId CharStyle
-type ParStyle = (String, ParStyleData)
-
-type CharStyleMap = M.Map String RunStyle
-
-type ParStyleMap = M.Map String ParStyleData
+type ParStyleMap = M.Map ParaStyleId ParStyle
data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
deriving Show
@@ -213,12 +227,9 @@ data ChangeInfo = ChangeInfo ChangeId Author ChangeDate
data TrackedChange = TrackedChange ChangeType ChangeInfo
deriving Show
-data ParagraphStyle = ParagraphStyle { pStyle :: [String]
+data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle]
, indentation :: Maybe ParIndentation
, dropCap :: Bool
- , pHeading :: Maybe (String, Int)
- , pNumInfo :: Maybe (String, String)
- , pBlockQuote :: Maybe Bool
, pChange :: Maybe TrackedChange
}
deriving Show
@@ -227,9 +238,6 @@ defaultParagraphStyle :: ParagraphStyle
defaultParagraphStyle = ParagraphStyle { pStyle = []
, indentation = Nothing
, dropCap = False
- , pHeading = Nothing
- , pNumInfo = Nothing
- , pBlockQuote = Nothing
, pChange = Nothing
}
@@ -254,6 +262,49 @@ newtype Row = Row [Cell]
newtype Cell = Cell [BodyPart]
deriving Show
+newtype CharStyleId = CharStyleId { fromCharStyleId :: String }
+ deriving (Show, Eq, Ord, FromStyleId)
+newtype ParaStyleId = ParaStyleId { fromParaStyleId :: String }
+ deriving (Show, Eq, Ord, FromStyleId)
+
+newtype CharStyleName = CharStyleName { fromCharStyleName :: CIString }
+ deriving (Show, Eq, Ord, IsString, FromStyleName)
+newtype ParaStyleName = ParaStyleName { fromParaStyleName :: CIString }
+ deriving (Show, Eq, Ord, IsString, FromStyleName)
+
+-- Case-insensitive comparisons
+newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
+
+class FromStyleName a where
+ fromStyleName :: a -> String
+
+instance FromStyleName String where
+ fromStyleName = id
+
+class FromStyleId a where
+ fromStyleId :: a -> String
+
+instance FromStyleId String where
+ fromStyleId = id
+
+instance Eq CIString where
+ (==) = (==) `on` map toLower . coerce
+
+instance Ord CIString where
+ compare = compare `on` map toLower . coerce
+
+leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle
+leftBiasedMergeRunStyle a b = RunStyle
+ { isBold = isBold a <|> isBold b
+ , isItalic = isItalic a <|> isItalic b
+ , isSmallCaps = isSmallCaps a <|> isSmallCaps b
+ , isStrike = isStrike a <|> isStrike b
+ , isRTL = isRTL a <|> isRTL b
+ , rVertAlign = rVertAlign a <|> rVertAlign b
+ , rUnderline = rUnderline a <|> rUnderline b
+ , rParentStyle = rParentStyle a
+ }
+
-- (width, height) in EMUs
type Extent = Maybe (Double, Double)
@@ -285,21 +336,28 @@ data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
data VertAlign = BaseLn | SupScrpt | SubScrpt
deriving Show
-data RunStyle = RunStyle { isBold :: Maybe Bool
- , isItalic :: Maybe Bool
- , isSmallCaps :: Maybe Bool
- , isStrike :: Maybe Bool
- , isRTL :: Maybe Bool
- , rVertAlign :: Maybe VertAlign
- , rUnderline :: Maybe String
- , rStyle :: Maybe CharStyle
+data CharStyle = CharStyle { cStyleId :: CharStyleId
+ , cStyleName :: CharStyleName
+ , cStyleData :: RunStyle
+ } deriving (Show)
+
+data RunStyle = RunStyle { isBold :: Maybe Bool
+ , isItalic :: Maybe Bool
+ , isSmallCaps :: Maybe Bool
+ , isStrike :: Maybe Bool
+ , isRTL :: Maybe Bool
+ , rVertAlign :: Maybe VertAlign
+ , rUnderline :: Maybe String
+ , rParentStyle :: Maybe CharStyle
}
deriving Show
-data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
- , isBlockQuote :: Maybe Bool
- , numInfo :: Maybe (String, String)
- , psStyle :: Maybe ParStyle}
+data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
+ , numInfo :: Maybe (String, String)
+ , psParentStyle :: Maybe ParStyle
+ , pStyleName :: ParaStyleName
+ , pStyleId :: ParaStyleId
+ }
deriving Show
defaultRunStyle :: RunStyle
@@ -310,7 +368,7 @@ defaultRunStyle = RunStyle { isBold = Nothing
, isRTL = Nothing
, rVertAlign = Nothing
, rUnderline = Nothing
- , rStyle = Nothing
+ , rParentStyle = Nothing
}
type Target = String
@@ -390,7 +448,10 @@ elemToBody ns element | isElem ns "w" "body" element =
elemToBody _ _ = throwError WrongElem
archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
-archiveToStyles zf =
+archiveToStyles = archiveToStyles' getStyleId getStyleId
+archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
+ (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles' conv1 conv2 zf =
let stylesElem = findEntryByPath "word/styles.xml" zf >>=
(parseXMLDoc . UTF8.toStringLazy . fromEntry)
in
@@ -399,19 +460,17 @@ archiveToStyles zf =
Just styElem ->
let namespaces = elemToNameSpaces styElem
in
- ( M.fromList $ buildBasedOnList namespaces styElem
- (Nothing :: Maybe CharStyle),
- M.fromList $ buildBasedOnList namespaces styElem
- (Nothing :: Maybe ParStyle) )
+ ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
+ M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
-isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool
+isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
isBasedOnStyle ns element parentStyle
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
findAttrByName ns "w" "val"
- , Just ps <- parentStyle = basedOnVal == getStyleId ps
+ , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
@@ -419,30 +478,70 @@ isBasedOnStyle ns element parentStyle
, Nothing <- parentStyle = True
| otherwise = False
-class ElemToStyle a where
+class HasStyleId a => ElemToStyle a where
cStyleType :: Maybe a -> String
elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
- getStyleId :: a -> String
+
+class FromStyleId (StyleId a) => HasStyleId a where
+ type StyleId a
+ getStyleId :: a -> StyleId a
+
+class FromStyleName (StyleName a) => HasStyleName a where
+ type StyleName a
+ getStyleName :: a -> StyleName a
+
+class HasParentStyle a where
+ getParentStyle :: a -> Maybe a
+
+instance HasParentStyle CharStyle where
+ getParentStyle = rParentStyle . cStyleData
+
+instance HasParentStyle ParStyle where
+ getParentStyle = psParentStyle
+
+getStyleNames :: (Functor t, HasStyleName a) => t a -> t (StyleName a)
+getStyleNames = fmap getStyleName
+
+constructBogusParStyleData :: ParaStyleName -> ParStyle
+constructBogusParStyleData stName = ParStyle
+ { headingLev = Nothing
+ , numInfo = Nothing
+ , psParentStyle = Nothing
+ , pStyleName = stName
+ , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName
+ }
instance ElemToStyle CharStyle where
cStyleType _ = "character"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just "character" <- findAttrByName ns "w" "type" element
- , Just styleId <- findAttrByName ns "w" "styleId" element =
- Just (styleId, elemToRunStyle ns element parentStyle)
+ , Just "character" <- findAttrByName ns "w" "type" element =
+ elemToCharStyle ns element parentStyle
| otherwise = Nothing
- getStyleId s = fst s
+
+instance HasStyleId CharStyle where
+ type StyleId CharStyle = CharStyleId
+ getStyleId = cStyleId
+
+instance HasStyleName CharStyle where
+ type StyleName CharStyle = CharStyleName
+ getStyleName = cStyleName
instance ElemToStyle ParStyle where
cStyleType _ = "paragraph"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
, Just "paragraph" <- findAttrByName ns "w" "type" element
- , Just styleId <- findAttrByName ns "w" "styleId" element =
- Just (styleId, elemToParStyleData ns element parentStyle)
+ = elemToParStyleData ns element parentStyle
| otherwise = Nothing
- getStyleId s = fst s
+
+instance HasStyleId ParStyle where
+ type StyleId ParStyle = ParaStyleId
+ getStyleId = pStyleId
+
+instance HasStyleName ParStyle where
+ type StyleName ParStyle = ParaStyleName
+ getStyleName = pStyleName
getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
getStyleChildren ns element parentStyle
@@ -693,6 +792,12 @@ testBitMask bitMaskS n =
stringToInteger :: String -> Maybe Integer
stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int)
+pHeading = getParStyleField headingLev . pStyle
+
+pNumInfo :: ParagraphStyle -> Maybe (String, String)
+pNumInfo = getParStyleField numInfo . pStyle
+
elemToBodyPart :: NameSpaces -> Element -> D BodyPart
elemToBodyPart ns element
| isElem ns "w" "p" element
@@ -1003,20 +1108,18 @@ elemToRun ns element
return $ Run runStyle runElems
elemToRun _ _ = throwError WrongElem
-getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a
+getParentStyleValue :: (ParStyle -> Maybe a) -> ParStyle -> Maybe a
getParentStyleValue field style
| Just value <- field style = Just value
- | Just parentStyle <- psStyle style
- = getParentStyleValue field (snd parentStyle)
+ | Just parentStyle <- psParentStyle style
+ = getParentStyleValue field parentStyle
getParentStyleValue _ _ = Nothing
-getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] ->
- Maybe a
-getParStyleField field stylemap styles
- | x <- mapMaybe (\x -> M.lookup x stylemap) styles
- , (y:_) <- mapMaybe (getParentStyleValue field) x
+getParStyleField :: (ParStyle -> Maybe a) -> [ParStyle] -> Maybe a
+getParStyleField field styles
+ | (y:_) <- mapMaybe (getParentStyleValue field) styles
= Just y
-getParStyleField _ _ _ = Nothing
+getParStyleField _ _ = Nothing
getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
getTrackedChange ns element
@@ -1038,10 +1141,10 @@ elemToParagraphStyle ns element sty
| Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
- (findAttrByName ns "w" "val")
+ (fmap ParaStyleId . findAttrByName ns "w" "val")
(findChildrenByName ns "w" "pStyle" pPr)
in ParagraphStyle
- {pStyle = style
+ {pStyle = mapMaybe (`M.lookup` sty) style
, indentation =
findChildByName ns "w" "ind" pPr >>=
elemToParIndentation ns
@@ -1053,9 +1156,6 @@ elemToParagraphStyle ns element sty
Just "none" -> False
Just _ -> True
Nothing -> False
- , pHeading = getParStyleField headingLev sty style
- , pNumInfo = getParStyleField numInfo sty style
- , pBlockQuote = getParStyleField isBlockQuote sty style
, pChange = findChildByName ns "w" "rPr" pPr >>=
filterChild (\e -> isElem ns "w" "ins" e ||
isElem ns "w" "moveTo" e ||
@@ -1085,16 +1185,20 @@ elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
elemToRunStyleD ns element
| Just rPr <- findChildByName ns "w" "rPr" element = do
charStyles <- asks envCharStyles
- let parentSty = case
+ let parentSty =
findChildByName ns "w" "rStyle" rPr >>=
- findAttrByName ns "w" "val"
- of
- Just styName | Just style <- M.lookup styName charStyles ->
- Just (styName, style)
- _ -> Nothing
+ findAttrByName ns "w" "val" >>=
+ flip M.lookup charStyles . CharStyleId
return $ elemToRunStyle ns element parentSty
elemToRunStyleD _ _ = return defaultRunStyle
+elemToCharStyle :: NameSpaces
+ -> Element -> Maybe CharStyle -> Maybe CharStyle
+elemToCharStyle ns element parentStyle
+ = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
+ <*> getElementStyleName ns element
+ <*> (Just $ elemToRunStyle ns element parentStyle)
+
elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
elemToRunStyle ns element parentStyle
| Just rPr <- findChildByName ns "w" "rPr" element =
@@ -1117,38 +1221,23 @@ elemToRunStyle ns element parentStyle
, rUnderline =
findChildByName ns "w" "u" rPr >>=
findAttrByName ns "w" "val"
- , rStyle = parentStyle
+ , rParentStyle = parentStyle
}
elemToRunStyle _ _ _ = defaultRunStyle
-getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int)
+getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
getHeaderLevel ns element
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , Just index <- stripPrefix "Heading" styleId
- , Just n <- stringToInteger index
- , n > 0 = Just (styleId, fromInteger n)
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , Just index <- findChildByName ns "w" "name" element >>=
- findAttrByName ns "w" "val" >>=
- stripPrefix "heading "
- , Just n <- stringToInteger index
- , n > 0 = Just (styleId, fromInteger n)
+ | Just styleName <- getElementStyleName ns element
+ , Just n <- stringToInteger =<<
+ (stripPrefix "heading " . map toLower $
+ fromStyleName styleName)
+ , n > 0 = Just (styleName, fromInteger n)
getHeaderLevel _ _ = Nothing
-blockQuoteStyleIds :: [String]
-blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"]
-
-blockQuoteStyleNames :: [String]
-blockQuoteStyleNames = ["Quote", "Block Text"]
-
-getBlockQuote :: NameSpaces -> Element -> Maybe Bool
-getBlockQuote ns element
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , styleId `elem` blockQuoteStyleIds = Just True
- | Just styleName <- findChildByName ns "w" "name" element >>=
- findAttrByName ns "w" "val"
- , styleName `elem` blockQuoteStyleNames = Just True
-getBlockQuote _ _ = Nothing
+getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
+getElementStyleName ns el = coerce <$>
+ ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+ <|> findAttrByName ns "w" "styleId" el)
getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
getNumInfo ns element = do
@@ -1163,15 +1252,19 @@ getNumInfo ns element = do
return (numId, lvl)
-elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData
-elemToParStyleData ns element parentStyle =
- ParStyleData
+elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
+elemToParStyleData ns element parentStyle
+ | Just styleId <- findAttrByName ns "w" "styleId" element
+ , Just styleName <- getElementStyleName ns element
+ = Just $ ParStyle
{
headingLev = getHeaderLevel ns element
- , isBlockQuote = getBlockQuote ns element
, numInfo = getNumInfo ns element
- , psStyle = parentStyle
- }
+ , psParentStyle = parentStyle
+ , pStyleName = styleName
+ , pStyleId = ParaStyleId styleId
+ }
+elemToParStyleData _ _ _ = Nothing
elemToRunElem :: NameSpaces -> Element -> D RunElem
elemToRunElem ns element
diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs
index 9d0913e55..583a6ec18 100644
--- a/test/Tests/Readers/Docx.hs
+++ b/test/Tests/Readers/Docx.hs
@@ -256,6 +256,10 @@ tests = [ testGroup "document"
"docx/lists.docx"
"docx/lists.native"
, testCompare
+ "compact lists"
+ "docx/lists-compact.docx"
+ "docx/lists-compact.native"
+ , testCompare
"lists with level overrides"
"docx/lists_level_override.docx"
"docx/lists_level_override.native"
@@ -425,6 +429,11 @@ tests = [ testGroup "document"
"custom styles (`+styles`) enabled"
"docx/custom-style-reference.docx"
"docx/custom-style-with-styles.native"
+ , testCompareWithOpts
+ def{readerExtensions=extensionsFromList [Ext_styles]}
+ "custom styles (`+styles`): Compact style is removed from output"
+ "docx/compact-style-removal.docx"
+ "docx/compact-style-removal.native"
]
, testGroup "metadata"
[ testCompareWithOpts def{readerStandalone=True}
diff --git a/test/docx/compact-style-removal.docx b/test/docx/compact-style-removal.docx
new file mode 100644
index 000000000..fde0064db
--- /dev/null
+++ b/test/docx/compact-style-removal.docx
Binary files differ
diff --git a/test/docx/compact-style-removal.native b/test/docx/compact-style-removal.native
new file mode 100644
index 000000000..340878ba0
--- /dev/null
+++ b/test/docx/compact-style-removal.native
@@ -0,0 +1,5 @@
+[OrderedList (1,Decimal,Period)
+ [[Plain [Str "One"]]
+ ,[Plain [Str "Two"]]
+ ,[Plain [Str "Three"]]
+ ,[Plain [Str "Four"]]]]
diff --git a/test/docx/lists-compact.docx b/test/docx/lists-compact.docx
new file mode 100644
index 000000000..d7f9e4a06
--- /dev/null
+++ b/test/docx/lists-compact.docx
Binary files differ
diff --git a/test/docx/lists-compact.native b/test/docx/lists-compact.native
new file mode 100644
index 000000000..340878ba0
--- /dev/null
+++ b/test/docx/lists-compact.native
@@ -0,0 +1,5 @@
+[OrderedList (1,Decimal,Period)
+ [[Plain [Str "One"]]
+ ,[Plain [Str "Two"]]
+ ,[Plain [Str "Three"]]
+ ,[Plain [Str "Four"]]]]