diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 162 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Lists.hs | 25 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 283 |
3 files changed, 287 insertions, 183 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index a26986af2..9d17ab118 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -3,6 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE FlexibleContexts #-} {- | Module : Text.Pandoc.Readers.Docx Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -65,6 +66,7 @@ import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.Default (Default) import Data.List (delete, intersect) +import Data.Char (isSpace) import qualified Data.Map as M import Data.Maybe (isJust, fromMaybe) import Data.Sequence (ViewL (..), viewl) @@ -133,13 +135,13 @@ evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a evalDocxContext ctx env st = flip evalStateT st $ runReaderT ctx env -- This is empty, but we put it in for future-proofing. -spansToKeep :: [String] +spansToKeep :: [CharStyleName] spansToKeep = [] -divsToKeep :: [String] -divsToKeep = ["list-item", "Definition", "DefinitionTerm"] +divsToKeep :: [ParaStyleName] +divsToKeep = ["Definition", "Definition Term"] -metaStyles :: M.Map String String +metaStyles :: M.Map ParaStyleName String metaStyles = M.fromList [ ("Title", "title") , ("Subtitle", "subtitle") , ("Author", "author") @@ -151,7 +153,7 @@ sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp) isMetaPar :: BodyPart -> Bool isMetaPar (Paragraph pPr _) = - not $ null $ intersect (pStyle pPr) (M.keys metaStyles) + not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles) isMetaPar _ = False isEmptyPar :: BodyPart -> Bool @@ -168,7 +170,7 @@ bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String M bodyPartsToMeta' [] = return M.empty bodyPartsToMeta' (bp : bps) | (Paragraph pPr parParts) <- bp - , (c : _)<- (pStyle pPr) `intersect` (M.keys metaStyles) + , (c : _)<- getStyleNames (pStyle pPr) `intersect` M.keys metaStyles , (Just metaField) <- M.lookup c metaStyles = do inlines <- smushInlines <$> mapM parPartToInlines parParts remaining <- bodyPartsToMeta' bps @@ -198,11 +200,29 @@ fixAuthors (MetaBlocks blks) = g _ = MetaInlines [] fixAuthors mv = mv -codeStyles :: [String] -codeStyles = ["VerbatimChar"] +isInheritedFromStyles :: (Eq (StyleName s), HasStyleName s, HasParentStyle s) => [StyleName s] -> s -> Bool +isInheritedFromStyles names sty + | getStyleName sty `elem` names = True + | Just psty <- getParentStyle sty = isInheritedFromStyles names psty + | otherwise = False -codeDivs :: [String] -codeDivs = ["SourceCode"] +hasStylesInheritedFrom :: [ParaStyleName] -> ParagraphStyle -> Bool +hasStylesInheritedFrom ns s = any (isInheritedFromStyles ns) $ pStyle s + +removeStyleNamed :: ParaStyleName -> ParagraphStyle -> ParagraphStyle +removeStyleNamed sn ps = ps{pStyle = filter (\psd -> getStyleName psd /= sn) $ pStyle ps} + +isCodeCharStyle :: CharStyle -> Bool +isCodeCharStyle = isInheritedFromStyles ["Verbatim Char"] + +isCodeDiv :: ParagraphStyle -> Bool +isCodeDiv = hasStylesInheritedFrom ["Source Code"] + +isBlockQuote :: ParStyle -> Bool +isBlockQuote = + isInheritedFromStyles [ + "Quote", "Block Text", "Block Quote", "Block Quotation" + ] runElemToInlines :: RunElem -> Inlines runElemToInlines (TextRun s) = text s @@ -228,57 +248,31 @@ parPartToString (InternalHyperLink _ runs) = concatMap runToString runs parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs parPartToString _ = "" -blacklistedCharStyles :: [String] +blacklistedCharStyles :: [CharStyleName] blacklistedCharStyles = ["Hyperlink"] resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle resolveDependentRunStyle rPr - | Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles = + | Just s <- rParentStyle rPr + , getStyleName s `elem` blacklistedCharStyles = return rPr - | Just (_, cs) <- rStyle rPr = do + | Just s <- rParentStyle rPr = do opts <- asks docxOptions if isEnabled Ext_styles opts then return rPr - else do rPr' <- resolveDependentRunStyle cs - return $ - RunStyle { isBold = case isBold rPr of - Just bool -> Just bool - Nothing -> isBold rPr' - , isItalic = case isItalic rPr of - Just bool -> Just bool - Nothing -> isItalic rPr' - , isSmallCaps = case isSmallCaps rPr of - Just bool -> Just bool - Nothing -> isSmallCaps rPr' - , isStrike = case isStrike rPr of - Just bool -> Just bool - Nothing -> isStrike rPr' - , isRTL = case isRTL rPr of - Just bool -> Just bool - Nothing -> isRTL rPr' - , rVertAlign = case rVertAlign rPr of - Just valign -> Just valign - Nothing -> rVertAlign rPr' - , rUnderline = case rUnderline rPr of - Just ulstyle -> Just ulstyle - Nothing -> rUnderline rPr' - , rStyle = rStyle rPr - } + else leftBiasedMergeRunStyle rPr <$> resolveDependentRunStyle (cStyleData s) | otherwise = return rPr runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines) runStyleToTransform rPr - | Just (s, _) <- rStyle rPr - , s `elem` spansToKeep = do - transform <- runStyleToTransform rPr{rStyle = Nothing} - return $ spanWith ("", [s], []) . transform - | Just (s, _) <- rStyle rPr = do - opts <- asks docxOptions - let extraInfo = if isEnabled Ext_styles opts - then spanWith ("", [], [("custom-style", s)]) - else id - transform <- runStyleToTransform rPr{rStyle = Nothing} - return $ extraInfo . transform + | Just sn <- getStyleName <$> rParentStyle rPr + , sn `elem` spansToKeep = do + transform <- runStyleToTransform rPr{rParentStyle = Nothing} + return $ spanWith ("", [normalizeToClassName sn], []) . transform + | Just s <- rParentStyle rPr = do + ei <- extraInfo spanWith s + transform <- runStyleToTransform rPr{rParentStyle = Nothing} + return $ ei . transform | Just True <- isItalic rPr = do transform <- runStyleToTransform rPr{isItalic = Nothing} return $ emph . transform @@ -310,8 +304,7 @@ runStyleToTransform rPr runToInlines :: PandocMonad m => Run -> DocxContext m Inlines runToInlines (Run rs runElems) - | Just (s, _) <- rStyle rs - , s `elem` codeStyles = do + | maybe False isCodeCharStyle $ rParentStyle rs = do rPr <- resolveDependentRunStyle rs let codeString = code $ concatMap runElemToString runElems return $ case rVertAlign rPr of @@ -526,39 +519,49 @@ trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils isSp LineBreak = True isSp _ = False +extraInfo :: (Eq (StyleName a), PandocMonad m, HasStyleName a) + => (Attr -> i -> i) -> a -> DocxContext m (i -> i) +extraInfo f s = do + opts <- asks docxOptions + return $ if | isEnabled Ext_styles opts + -> f ("", [], [("custom-style", fromStyleName $ getStyleName s)]) + | otherwise -> id + parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks) parStyleToTransform pPr | (c:cs) <- pStyle pPr - , c `elem` divsToKeep = do + , getStyleName c `elem` divsToKeep = do let pPr' = pPr { pStyle = cs } transform <- parStyleToTransform pPr' - return $ divWith ("", [c], []) . transform + return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform | (c:cs) <- pStyle pPr, - c `elem` listParagraphDivs = do + getStyleName c `elem` listParagraphStyles = do let pPr' = pPr { pStyle = cs, indentation = Nothing} transform <- parStyleToTransform pPr' - return $ divWith ("", [c], []) . transform + return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform | (c:cs) <- pStyle pPr = do - opts <- asks docxOptions - let pPr' = pPr { pStyle = cs} + let pPr' = pPr { pStyle = cs } transform <- parStyleToTransform pPr' - let extraInfo = if isEnabled Ext_styles opts - then divWith ("", [], [("custom-style", c)]) - else id - return $ extraInfo . (if fromMaybe False (pBlockQuote pPr) then blockQuote else id) . transform + ei <- extraInfo divWith c + return $ ei . (if isBlockQuote c then blockQuote else id) . transform | null (pStyle pPr) , Just left <- indentation pPr >>= leftParIndent = do let pPr' = pPr { indentation = Nothing } hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent transform <- parStyleToTransform pPr' - return $ if (left - hang) > 0 + return $ if (left - hang) > 0 then blockQuote . transform else transform parStyleToTransform _ = return id +normalizeToClassName :: (FromStyleName a) => a -> String +normalizeToClassName = map go . fromStyleName + where go c | isSpace c = '-' + | otherwise = c + bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks bodyPartToBlocks (Paragraph pPr parparts) - | not $ null $ codeDivs `intersect` (pStyle pPr) = do + | isCodeDiv pPr = do transform <- parStyleToTransform pPr return $ transform $ @@ -568,13 +571,16 @@ bodyPartToBlocks (Paragraph pPr parparts) ils <-local (\s-> s{docxInHeaderBlock=True}) (smushInlines <$> mapM parPartToInlines parparts) makeHeaderAnchor $ - headerWith ("", delete style (pStyle pPr), []) n ils + headerWith ("", map normalizeToClassName . delete style $ getStyleNames (pStyle pPr), []) n ils | otherwise = do ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts prevParaIls <- gets docxPrevPara dropIls <- gets docxDropCap let ils' = dropIls <> ils - if dropCap pPr + let (paraOrPlain, pPr') + | hasStylesInheritedFrom ["Compact"] pPr = (plain, removeStyleNamed "Compact" pPr) + | otherwise = (para, pPr) + if dropCap pPr' then do modify $ \s -> s { docxDropCap = ils' } return mempty else do modify $ \s -> s { docxDropCap = mempty } @@ -583,41 +589,41 @@ bodyPartToBlocks (Paragraph pPr parparts) ils' handleInsertion = do modify $ \s -> s {docxPrevPara = mempty} - transform <- parStyleToTransform pPr - return $ transform $ para ils'' + transform <- parStyleToTransform pPr' + return $ transform $ paraOrPlain ils'' opts <- asks docxOptions if | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) -> return mempty - | Just (TrackedChange Insertion _) <- pChange pPr + | Just (TrackedChange Insertion _) <- pChange pPr' , AcceptChanges <- readerTrackChanges opts -> handleInsertion - | Just (TrackedChange Insertion _) <- pChange pPr + | Just (TrackedChange Insertion _) <- pChange pPr' , RejectChanges <- readerTrackChanges opts -> do modify $ \s -> s {docxPrevPara = ils''} return mempty - | Just (TrackedChange Insertion cInfo) <- pChange pPr + | Just (TrackedChange Insertion cInfo) <- pChange pPr' , AllChanges <- readerTrackChanges opts , ChangeInfo _ cAuthor cDate <- cInfo -> do let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)]) insertMark = spanWith attr mempty - transform <- parStyleToTransform pPr + transform <- parStyleToTransform pPr' return $ transform $ - para $ ils'' <> insertMark - | Just (TrackedChange Deletion _) <- pChange pPr + paraOrPlain $ ils'' <> insertMark + | Just (TrackedChange Deletion _) <- pChange pPr' , AcceptChanges <- readerTrackChanges opts -> do modify $ \s -> s {docxPrevPara = ils''} return mempty - | Just (TrackedChange Deletion _) <- pChange pPr + | Just (TrackedChange Deletion _) <- pChange pPr' , RejectChanges <- readerTrackChanges opts -> handleInsertion - | Just (TrackedChange Deletion cInfo) <- pChange pPr + | Just (TrackedChange Deletion cInfo) <- pChange pPr' , AllChanges <- readerTrackChanges opts , ChangeInfo _ cAuthor cDate <- cInfo -> do let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)]) insertMark = spanWith attr mempty - transform <- parStyleToTransform pPr + transform <- parStyleToTransform pPr' return $ transform $ - para $ ils'' <> insertMark + paraOrPlain $ ils'' <> insertMark | otherwise -> handleInsertion bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do -- We check whether this current numId has previously been used, @@ -638,7 +644,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do blks <- bodyPartToBlocks (Paragraph pPr parparts) return $ divWith ("", ["list-item"], kvs) blks bodyPartToBlocks (ListItem pPr _ _ _ parparts) = - let pPr' = pPr {pStyle = "ListParagraph": pStyle pPr} + let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr} in bodyPartToBlocks $ Paragraph pPr' parparts bodyPartToBlocks (Tbl _ _ _ []) = diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index cc390f122..eb24640c5 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Lists Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -14,13 +15,16 @@ Functions for converting flat docx paragraphs into nested lists. module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets , blocksToDefinitions , listParagraphDivs + , listParagraphStyles ) where import Prelude import Data.List import Data.Maybe +import Data.String (fromString) import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.JSON +import Text.Pandoc.Readers.Docx.Parse (ParaStyleName) import Text.Pandoc.Shared (trim, safeRead) isListItem :: Block -> Bool @@ -79,7 +83,10 @@ getListType b@(Div (_, _, kvs) _) | isListItem b = getListType _ = Nothing listParagraphDivs :: [String] -listParagraphDivs = ["ListParagraph"] +listParagraphDivs = ["list-paragraph"] + +listParagraphStyles :: [ParaStyleName] +listParagraphStyles = map fromString listParagraphDivs -- This is a first stab at going through and attaching meaning to list -- paragraphs, without an item marker, following a list item. We @@ -160,7 +167,7 @@ blocksToDefinitions' defAcc acc [] = reverse $ DefinitionList (reverse defAcc) : acc blocksToDefinitions' defAcc acc (Div (_, classes1, _) blks1 : Div (ident2, classes2, kvs2) blks2 : blks) - | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = + | "Definition-Term" `elem` classes1 && "Definition" `elem` classes2 = let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) in @@ -169,12 +176,12 @@ blocksToDefinitions' ((defTerm, defItems):defs) acc (Div (ident2, classes2, kvs2) blks2 : blks) | "Definition" `elem` classes2 = let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - defItems2 = case remainingAttr2 == ("", [], []) of - True -> blks2 - False -> [Div remainingAttr2 blks2] - defAcc' = case null defItems of - True -> (defTerm, [defItems2]) : defs - False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs + defItems2 = if remainingAttr2 == ("", [], []) + then blks2 + else [Div remainingAttr2 blks2] + defAcc' = if null defItems + then (defTerm, [defItems2]) : defs + else (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs in blocksToDefinitions' defAcc' acc blks blocksToDefinitions' [] acc (b:blks) = @@ -198,7 +205,5 @@ removeListDivs' blk = [blk] removeListDivs :: [Block] -> [Block] removeListDivs = concatMap removeListDivs' - - blocksToDefinitions :: [Block] -> [Block] blocksToDefinitions = blocksToDefinitions' [] [] diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 330c9208f..00c5fb0be 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,7 +1,11 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | Module : Text.Pandoc.Readers.Docx.Parse Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -31,6 +35,8 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , VertAlign(..) , ParIndentation(..) , ParagraphStyle(..) + , ParStyle + , CharStyle(cStyleData) , Row(..) , Cell(..) , TrackedChange(..) @@ -38,8 +44,17 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , ChangeInfo(..) , FieldInfo(..) , Level(..) + , ParaStyleName + , CharStyleName + , FromStyleName(..) + , HasStyleName(..) + , HasParentStyle(..) , archiveToDocx , archiveToDocxWithWarnings + , getStyleNames + , pHeading + , constructBogusParStyleData + , leftBiasedMergeRunStyle ) where import Prelude import Codec.Archive.Zip @@ -49,10 +64,13 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B -import Data.Char (chr, ord, readLitChar) +import Data.Char (chr, ord, readLitChar, toLower) import Data.List +import Data.Function (on) +import Data.String (IsString(..)) import qualified Data.Map as M import Data.Maybe +import Data.Coerce import System.FilePath import Text.Pandoc.Readers.Docx.Util import Text.Pandoc.Readers.Docx.Fields @@ -160,13 +178,9 @@ newtype Body = Body [BodyPart] type Media = [(FilePath, B.ByteString)] -type CharStyle = (String, RunStyle) +type CharStyleMap = M.Map CharStyleId CharStyle -type ParStyle = (String, ParStyleData) - -type CharStyleMap = M.Map String RunStyle - -type ParStyleMap = M.Map String ParStyleData +type ParStyleMap = M.Map ParaStyleId ParStyle data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] deriving Show @@ -213,12 +227,9 @@ data ChangeInfo = ChangeInfo ChangeId Author ChangeDate data TrackedChange = TrackedChange ChangeType ChangeInfo deriving Show -data ParagraphStyle = ParagraphStyle { pStyle :: [String] +data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle] , indentation :: Maybe ParIndentation , dropCap :: Bool - , pHeading :: Maybe (String, Int) - , pNumInfo :: Maybe (String, String) - , pBlockQuote :: Maybe Bool , pChange :: Maybe TrackedChange } deriving Show @@ -227,9 +238,6 @@ defaultParagraphStyle :: ParagraphStyle defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing , dropCap = False - , pHeading = Nothing - , pNumInfo = Nothing - , pBlockQuote = Nothing , pChange = Nothing } @@ -254,6 +262,49 @@ newtype Row = Row [Cell] newtype Cell = Cell [BodyPart] deriving Show +newtype CharStyleId = CharStyleId { fromCharStyleId :: String } + deriving (Show, Eq, Ord, FromStyleId) +newtype ParaStyleId = ParaStyleId { fromParaStyleId :: String } + deriving (Show, Eq, Ord, FromStyleId) + +newtype CharStyleName = CharStyleName { fromCharStyleName :: CIString } + deriving (Show, Eq, Ord, IsString, FromStyleName) +newtype ParaStyleName = ParaStyleName { fromParaStyleName :: CIString } + deriving (Show, Eq, Ord, IsString, FromStyleName) + +-- Case-insensitive comparisons +newtype CIString = CIString String deriving (Show, IsString, FromStyleName) + +class FromStyleName a where + fromStyleName :: a -> String + +instance FromStyleName String where + fromStyleName = id + +class FromStyleId a where + fromStyleId :: a -> String + +instance FromStyleId String where + fromStyleId = id + +instance Eq CIString where + (==) = (==) `on` map toLower . coerce + +instance Ord CIString where + compare = compare `on` map toLower . coerce + +leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle +leftBiasedMergeRunStyle a b = RunStyle + { isBold = isBold a <|> isBold b + , isItalic = isItalic a <|> isItalic b + , isSmallCaps = isSmallCaps a <|> isSmallCaps b + , isStrike = isStrike a <|> isStrike b + , isRTL = isRTL a <|> isRTL b + , rVertAlign = rVertAlign a <|> rVertAlign b + , rUnderline = rUnderline a <|> rUnderline b + , rParentStyle = rParentStyle a + } + -- (width, height) in EMUs type Extent = Maybe (Double, Double) @@ -285,21 +336,28 @@ data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen data VertAlign = BaseLn | SupScrpt | SubScrpt deriving Show -data RunStyle = RunStyle { isBold :: Maybe Bool - , isItalic :: Maybe Bool - , isSmallCaps :: Maybe Bool - , isStrike :: Maybe Bool - , isRTL :: Maybe Bool - , rVertAlign :: Maybe VertAlign - , rUnderline :: Maybe String - , rStyle :: Maybe CharStyle +data CharStyle = CharStyle { cStyleId :: CharStyleId + , cStyleName :: CharStyleName + , cStyleData :: RunStyle + } deriving (Show) + +data RunStyle = RunStyle { isBold :: Maybe Bool + , isItalic :: Maybe Bool + , isSmallCaps :: Maybe Bool + , isStrike :: Maybe Bool + , isRTL :: Maybe Bool + , rVertAlign :: Maybe VertAlign + , rUnderline :: Maybe String + , rParentStyle :: Maybe CharStyle } deriving Show -data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int) - , isBlockQuote :: Maybe Bool - , numInfo :: Maybe (String, String) - , psStyle :: Maybe ParStyle} +data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) + , numInfo :: Maybe (String, String) + , psParentStyle :: Maybe ParStyle + , pStyleName :: ParaStyleName + , pStyleId :: ParaStyleId + } deriving Show defaultRunStyle :: RunStyle @@ -310,7 +368,7 @@ defaultRunStyle = RunStyle { isBold = Nothing , isRTL = Nothing , rVertAlign = Nothing , rUnderline = Nothing - , rStyle = Nothing + , rParentStyle = Nothing } type Target = String @@ -390,7 +448,10 @@ elemToBody ns element | isElem ns "w" "body" element = elemToBody _ _ = throwError WrongElem archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap) -archiveToStyles zf = +archiveToStyles = archiveToStyles' getStyleId getStyleId +archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) => + (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) +archiveToStyles' conv1 conv2 zf = let stylesElem = findEntryByPath "word/styles.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) in @@ -399,19 +460,17 @@ archiveToStyles zf = Just styElem -> let namespaces = elemToNameSpaces styElem in - ( M.fromList $ buildBasedOnList namespaces styElem - (Nothing :: Maybe CharStyle), - M.fromList $ buildBasedOnList namespaces styElem - (Nothing :: Maybe ParStyle) ) + ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing, + M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing) -isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool +isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool isBasedOnStyle ns element parentStyle | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= findAttrByName ns "w" "val" - , Just ps <- parentStyle = basedOnVal == getStyleId ps + , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle @@ -419,30 +478,70 @@ isBasedOnStyle ns element parentStyle , Nothing <- parentStyle = True | otherwise = False -class ElemToStyle a where +class HasStyleId a => ElemToStyle a where cStyleType :: Maybe a -> String elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a - getStyleId :: a -> String + +class FromStyleId (StyleId a) => HasStyleId a where + type StyleId a + getStyleId :: a -> StyleId a + +class FromStyleName (StyleName a) => HasStyleName a where + type StyleName a + getStyleName :: a -> StyleName a + +class HasParentStyle a where + getParentStyle :: a -> Maybe a + +instance HasParentStyle CharStyle where + getParentStyle = rParentStyle . cStyleData + +instance HasParentStyle ParStyle where + getParentStyle = psParentStyle + +getStyleNames :: (Functor t, HasStyleName a) => t a -> t (StyleName a) +getStyleNames = fmap getStyleName + +constructBogusParStyleData :: ParaStyleName -> ParStyle +constructBogusParStyleData stName = ParStyle + { headingLev = Nothing + , numInfo = Nothing + , psParentStyle = Nothing + , pStyleName = stName + , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName + } instance ElemToStyle CharStyle where cStyleType _ = "character" elemToStyle ns element parentStyle | isElem ns "w" "style" element - , Just "character" <- findAttrByName ns "w" "type" element - , Just styleId <- findAttrByName ns "w" "styleId" element = - Just (styleId, elemToRunStyle ns element parentStyle) + , Just "character" <- findAttrByName ns "w" "type" element = + elemToCharStyle ns element parentStyle | otherwise = Nothing - getStyleId s = fst s + +instance HasStyleId CharStyle where + type StyleId CharStyle = CharStyleId + getStyleId = cStyleId + +instance HasStyleName CharStyle where + type StyleName CharStyle = CharStyleName + getStyleName = cStyleName instance ElemToStyle ParStyle where cStyleType _ = "paragraph" elemToStyle ns element parentStyle | isElem ns "w" "style" element , Just "paragraph" <- findAttrByName ns "w" "type" element - , Just styleId <- findAttrByName ns "w" "styleId" element = - Just (styleId, elemToParStyleData ns element parentStyle) + = elemToParStyleData ns element parentStyle | otherwise = Nothing - getStyleId s = fst s + +instance HasStyleId ParStyle where + type StyleId ParStyle = ParaStyleId + getStyleId = pStyleId + +instance HasStyleName ParStyle where + type StyleName ParStyle = ParaStyleName + getStyleName = pStyleName getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] getStyleChildren ns element parentStyle @@ -693,6 +792,12 @@ testBitMask bitMaskS n = stringToInteger :: String -> Maybe Integer stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) +pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) +pHeading = getParStyleField headingLev . pStyle + +pNumInfo :: ParagraphStyle -> Maybe (String, String) +pNumInfo = getParStyleField numInfo . pStyle + elemToBodyPart :: NameSpaces -> Element -> D BodyPart elemToBodyPart ns element | isElem ns "w" "p" element @@ -1003,20 +1108,18 @@ elemToRun ns element return $ Run runStyle runElems elemToRun _ _ = throwError WrongElem -getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a +getParentStyleValue :: (ParStyle -> Maybe a) -> ParStyle -> Maybe a getParentStyleValue field style | Just value <- field style = Just value - | Just parentStyle <- psStyle style - = getParentStyleValue field (snd parentStyle) + | Just parentStyle <- psParentStyle style + = getParentStyleValue field parentStyle getParentStyleValue _ _ = Nothing -getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] -> - Maybe a -getParStyleField field stylemap styles - | x <- mapMaybe (\x -> M.lookup x stylemap) styles - , (y:_) <- mapMaybe (getParentStyleValue field) x +getParStyleField :: (ParStyle -> Maybe a) -> [ParStyle] -> Maybe a +getParStyleField field styles + | (y:_) <- mapMaybe (getParentStyleValue field) styles = Just y -getParStyleField _ _ _ = Nothing +getParStyleField _ _ = Nothing getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange getTrackedChange ns element @@ -1038,10 +1141,10 @@ elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (findAttrByName ns "w" "val") + (fmap ParaStyleId . findAttrByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle - {pStyle = style + {pStyle = mapMaybe (`M.lookup` sty) style , indentation = findChildByName ns "w" "ind" pPr >>= elemToParIndentation ns @@ -1053,9 +1156,6 @@ elemToParagraphStyle ns element sty Just "none" -> False Just _ -> True Nothing -> False - , pHeading = getParStyleField headingLev sty style - , pNumInfo = getParStyleField numInfo sty style - , pBlockQuote = getParStyleField isBlockQuote sty style , pChange = findChildByName ns "w" "rPr" pPr >>= filterChild (\e -> isElem ns "w" "ins" e || isElem ns "w" "moveTo" e || @@ -1085,16 +1185,20 @@ elemToRunStyleD :: NameSpaces -> Element -> D RunStyle elemToRunStyleD ns element | Just rPr <- findChildByName ns "w" "rPr" element = do charStyles <- asks envCharStyles - let parentSty = case + let parentSty = findChildByName ns "w" "rStyle" rPr >>= - findAttrByName ns "w" "val" - of - Just styName | Just style <- M.lookup styName charStyles -> - Just (styName, style) - _ -> Nothing + findAttrByName ns "w" "val" >>= + flip M.lookup charStyles . CharStyleId return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle +elemToCharStyle :: NameSpaces + -> Element -> Maybe CharStyle -> Maybe CharStyle +elemToCharStyle ns element parentStyle + = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) + <*> getElementStyleName ns element + <*> (Just $ elemToRunStyle ns element parentStyle) + elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle elemToRunStyle ns element parentStyle | Just rPr <- findChildByName ns "w" "rPr" element = @@ -1117,38 +1221,23 @@ elemToRunStyle ns element parentStyle , rUnderline = findChildByName ns "w" "u" rPr >>= findAttrByName ns "w" "val" - , rStyle = parentStyle + , rParentStyle = parentStyle } elemToRunStyle _ _ _ = defaultRunStyle -getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int) +getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) getHeaderLevel ns element - | Just styleId <- findAttrByName ns "w" "styleId" element - , Just index <- stripPrefix "Heading" styleId - , Just n <- stringToInteger index - , n > 0 = Just (styleId, fromInteger n) - | Just styleId <- findAttrByName ns "w" "styleId" element - , Just index <- findChildByName ns "w" "name" element >>= - findAttrByName ns "w" "val" >>= - stripPrefix "heading " - , Just n <- stringToInteger index - , n > 0 = Just (styleId, fromInteger n) + | Just styleName <- getElementStyleName ns element + , Just n <- stringToInteger =<< + (stripPrefix "heading " . map toLower $ + fromStyleName styleName) + , n > 0 = Just (styleName, fromInteger n) getHeaderLevel _ _ = Nothing -blockQuoteStyleIds :: [String] -blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"] - -blockQuoteStyleNames :: [String] -blockQuoteStyleNames = ["Quote", "Block Text"] - -getBlockQuote :: NameSpaces -> Element -> Maybe Bool -getBlockQuote ns element - | Just styleId <- findAttrByName ns "w" "styleId" element - , styleId `elem` blockQuoteStyleIds = Just True - | Just styleName <- findChildByName ns "w" "name" element >>= - findAttrByName ns "w" "val" - , styleName `elem` blockQuoteStyleNames = Just True -getBlockQuote _ _ = Nothing +getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a +getElementStyleName ns el = coerce <$> + ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") + <|> findAttrByName ns "w" "styleId" el) getNumInfo :: NameSpaces -> Element -> Maybe (String, String) getNumInfo ns element = do @@ -1163,15 +1252,19 @@ getNumInfo ns element = do return (numId, lvl) -elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData -elemToParStyleData ns element parentStyle = - ParStyleData +elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle +elemToParStyleData ns element parentStyle + | Just styleId <- findAttrByName ns "w" "styleId" element + , Just styleName <- getElementStyleName ns element + = Just $ ParStyle { headingLev = getHeaderLevel ns element - , isBlockQuote = getBlockQuote ns element , numInfo = getNumInfo ns element - , psStyle = parentStyle - } + , psParentStyle = parentStyle + , pStyleName = styleName + , pStyleId = ParaStyleId styleId + } +elemToParStyleData _ _ _ = Nothing elemToRunElem :: NameSpaces -> Element -> D RunElem elemToRunElem ns element |