diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 260 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 304 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/StyleMap.hs | 123 |
3 files changed, 307 insertions, 380 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 00c5fb0be..8c5c94bb9 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,14 +1,9 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE PatternGuards #-} {-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | Module : Text.Pandoc.Readers.Docx.Parse Copyright : Copyright (C) 2014-2019 Jesse Rosenthal + 2019 Nikolay Yakimov <root@livid.pp.ru> License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> @@ -57,6 +52,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , leftBiasedMergeRunStyle ) where import Prelude +import Text.Pandoc.Readers.Docx.Parse.Styles import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except @@ -64,13 +60,10 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B -import Data.Char (chr, ord, readLitChar, toLower) +import Data.Char (chr, ord, readLitChar) import Data.List -import Data.Function (on) -import Data.String (IsString(..)) import qualified Data.Map as M import Data.Maybe -import Data.Coerce import System.FilePath import Text.Pandoc.Readers.Docx.Util import Text.Pandoc.Readers.Docx.Fields @@ -262,37 +255,6 @@ newtype Row = Row [Cell] newtype Cell = Cell [BodyPart] deriving Show -newtype CharStyleId = CharStyleId { fromCharStyleId :: String } - deriving (Show, Eq, Ord, FromStyleId) -newtype ParaStyleId = ParaStyleId { fromParaStyleId :: String } - deriving (Show, Eq, Ord, FromStyleId) - -newtype CharStyleName = CharStyleName { fromCharStyleName :: CIString } - deriving (Show, Eq, Ord, IsString, FromStyleName) -newtype ParaStyleName = ParaStyleName { fromParaStyleName :: CIString } - deriving (Show, Eq, Ord, IsString, FromStyleName) - --- Case-insensitive comparisons -newtype CIString = CIString String deriving (Show, IsString, FromStyleName) - -class FromStyleName a where - fromStyleName :: a -> String - -instance FromStyleName String where - fromStyleName = id - -class FromStyleId a where - fromStyleId :: a -> String - -instance FromStyleId String where - fromStyleId = id - -instance Eq CIString where - (==) = (==) `on` map toLower . coerce - -instance Ord CIString where - compare = compare `on` map toLower . coerce - leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle leftBiasedMergeRunStyle a b = RunStyle { isBold = isBold a <|> isBold b @@ -333,44 +295,6 @@ data Run = Run RunStyle [RunElem] data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show -data VertAlign = BaseLn | SupScrpt | SubScrpt - deriving Show - -data CharStyle = CharStyle { cStyleId :: CharStyleId - , cStyleName :: CharStyleName - , cStyleData :: RunStyle - } deriving (Show) - -data RunStyle = RunStyle { isBold :: Maybe Bool - , isItalic :: Maybe Bool - , isSmallCaps :: Maybe Bool - , isStrike :: Maybe Bool - , isRTL :: Maybe Bool - , rVertAlign :: Maybe VertAlign - , rUnderline :: Maybe String - , rParentStyle :: Maybe CharStyle - } - deriving Show - -data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) - , numInfo :: Maybe (String, String) - , psParentStyle :: Maybe ParStyle - , pStyleName :: ParaStyleName - , pStyleId :: ParaStyleId - } - deriving Show - -defaultRunStyle :: RunStyle -defaultRunStyle = RunStyle { isBold = Nothing - , isItalic = Nothing - , isSmallCaps = Nothing - , isStrike = Nothing - , isRTL = Nothing - , rVertAlign = Nothing - , rUnderline = Nothing - , rParentStyle = Nothing - } - type Target = String type Anchor = String type URL = String @@ -449,46 +373,6 @@ elemToBody _ _ = throwError WrongElem archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap) archiveToStyles = archiveToStyles' getStyleId getStyleId -archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) => - (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) -archiveToStyles' conv1 conv2 zf = - let stylesElem = findEntryByPath "word/styles.xml" zf >>= - (parseXMLDoc . UTF8.toStringLazy . fromEntry) - in - case stylesElem of - Nothing -> (M.empty, M.empty) - Just styElem -> - let namespaces = elemToNameSpaces styElem - in - ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing, - M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing) - -isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool -isBasedOnStyle ns element parentStyle - | isElem ns "w" "style" element - , Just styleType <- findAttrByName ns "w" "type" element - , styleType == cStyleType parentStyle - , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrByName ns "w" "val" - , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) - | isElem ns "w" "style" element - , Just styleType <- findAttrByName ns "w" "type" element - , styleType == cStyleType parentStyle - , Nothing <- findChildByName ns "w" "basedOn" element - , Nothing <- parentStyle = True - | otherwise = False - -class HasStyleId a => ElemToStyle a where - cStyleType :: Maybe a -> String - elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a - -class FromStyleId (StyleId a) => HasStyleId a where - type StyleId a - getStyleId :: a -> StyleId a - -class FromStyleName (StyleName a) => HasStyleName a where - type StyleName a - getStyleName :: a -> StyleName a class HasParentStyle a where getParentStyle :: a -> Maybe a @@ -511,52 +395,6 @@ constructBogusParStyleData stName = ParStyle , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName } -instance ElemToStyle CharStyle where - cStyleType _ = "character" - elemToStyle ns element parentStyle - | isElem ns "w" "style" element - , Just "character" <- findAttrByName ns "w" "type" element = - elemToCharStyle ns element parentStyle - | otherwise = Nothing - -instance HasStyleId CharStyle where - type StyleId CharStyle = CharStyleId - getStyleId = cStyleId - -instance HasStyleName CharStyle where - type StyleName CharStyle = CharStyleName - getStyleName = cStyleName - -instance ElemToStyle ParStyle where - cStyleType _ = "paragraph" - elemToStyle ns element parentStyle - | isElem ns "w" "style" element - , Just "paragraph" <- findAttrByName ns "w" "type" element - = elemToParStyleData ns element parentStyle - | otherwise = Nothing - -instance HasStyleId ParStyle where - type StyleId ParStyle = ParaStyleId - getStyleId = pStyleId - -instance HasStyleName ParStyle where - type StyleName ParStyle = ParaStyleName - getStyleName = pStyleName - -getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] -getStyleChildren ns element parentStyle - | isElem ns "w" "styles" element = - mapMaybe (\e -> elemToStyle ns e parentStyle) $ - filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element - | otherwise = [] - -buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] -buildBasedOnList ns element rootStyle = - case getStyleChildren ns element rootStyle of - [] -> [] - stys -> stys ++ - concatMap (buildBasedOnList ns element . Just) stys - archiveToNotes :: Archive -> Notes archiveToNotes zf = let fnElem = findEntryByPath "word/footnotes.xml" zf @@ -789,9 +627,6 @@ testBitMask bitMaskS n = [] -> False ((n', _) : _) -> (n' .|. n) /= 0 -stringToInteger :: String -> Maybe Integer -stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) - pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) pHeading = getParStyleField headingLev . pStyle @@ -1166,21 +1001,6 @@ elemToParagraphStyle ns element sty } elemToParagraphStyle _ _ _ = defaultParagraphStyle -checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool -checkOnOff ns rPr tag - | Just t <- findChild tag rPr - , Just val <- findAttrByName ns "w" "val" t = - Just $ case val of - "true" -> True - "false" -> False - "on" -> True - "off" -> False - "1" -> True - "0" -> False - _ -> False - | Just _ <- findChild tag rPr = Just True -checkOnOff _ _ _ = Nothing - elemToRunStyleD :: NameSpaces -> Element -> D RunStyle elemToRunStyleD ns element | Just rPr <- findChildByName ns "w" "rPr" element = do @@ -1192,80 +1012,6 @@ elemToRunStyleD ns element return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle -elemToCharStyle :: NameSpaces - -> Element -> Maybe CharStyle -> Maybe CharStyle -elemToCharStyle ns element parentStyle - = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) - <*> getElementStyleName ns element - <*> (Just $ elemToRunStyle ns element parentStyle) - -elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle -elemToRunStyle ns element parentStyle - | Just rPr <- findChildByName ns "w" "rPr" element = - RunStyle - { - isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus` - checkOnOff ns rPr (elemName ns "w" "bCs") - , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus` - checkOnOff ns rPr (elemName ns "w" "iCs") - , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps") - , isStrike = checkOnOff ns rPr (elemName ns "w" "strike") - , isRTL = checkOnOff ns rPr (elemName ns "w" "rtl") - , rVertAlign = - findChildByName ns "w" "vertAlign" rPr >>= - findAttrByName ns "w" "val" >>= - \v -> Just $ case v of - "superscript" -> SupScrpt - "subscript" -> SubScrpt - _ -> BaseLn - , rUnderline = - findChildByName ns "w" "u" rPr >>= - findAttrByName ns "w" "val" - , rParentStyle = parentStyle - } -elemToRunStyle _ _ _ = defaultRunStyle - -getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) -getHeaderLevel ns element - | Just styleName <- getElementStyleName ns element - , Just n <- stringToInteger =<< - (stripPrefix "heading " . map toLower $ - fromStyleName styleName) - , n > 0 = Just (styleName, fromInteger n) -getHeaderLevel _ _ = Nothing - -getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a -getElementStyleName ns el = coerce <$> - ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") - <|> findAttrByName ns "w" "styleId" el) - -getNumInfo :: NameSpaces -> Element -> Maybe (String, String) -getNumInfo ns element = do - let numPr = findChildByName ns "w" "pPr" element >>= - findChildByName ns "w" "numPr" - lvl = fromMaybe "0" (numPr >>= - findChildByName ns "w" "ilvl" >>= - findAttrByName ns "w" "val") - numId <- numPr >>= - findChildByName ns "w" "numId" >>= - findAttrByName ns "w" "val" - return (numId, lvl) - - -elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle -elemToParStyleData ns element parentStyle - | Just styleId <- findAttrByName ns "w" "styleId" element - , Just styleName <- getElementStyleName ns element - = Just $ ParStyle - { - headingLev = getHeaderLevel ns element - , numInfo = getNumInfo ns element - , psParentStyle = parentStyle - , pStyleName = styleName - , pStyleId = ParaStyleId styleId - } -elemToParStyleData _ _ _ = Nothing - elemToRunElem :: NameSpaces -> Element -> D RunElem elemToRunElem ns element | isElem ns "w" "t" element diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs new file mode 100644 index 000000000..c2d27e7fb --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -0,0 +1,304 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{- | + Module : Text.Pandoc.Readers.Docx.Parse.Styles + Copyright : Copyright (C) 2014-2019 Jesse Rosenthal + 2019 Nikolay Yakimov <root@livid.pp.ru> + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +Type machinery and code for extraction and manipulation of docx styles +-} + +module Text.Pandoc.Readers.Docx.Parse.Styles ( + CharStyleId(..) + , CharStyle + , ParaStyleId(..) + , ParStyle(..) + , RunStyle(..) + , HasStyleName + , StyleName + , ParaStyleName + , CharStyleName + , FromStyleName + , VertAlign(..) + , StyleId + , HasStyleId + , archiveToStyles' + , getStyleId + , getStyleName + , cStyleData + , fromStyleName + , fromStyleId + , stringToInteger + , getNumInfo + , elemToRunStyle + , defaultRunStyle + ) where +import Prelude +import Codec.Archive.Zip +import Control.Applicative ((<|>)) +import Control.Monad.Except +import Data.Char (toLower) +import Data.List +import Data.Function (on) +import Data.String (IsString(..)) +import qualified Data.Map as M +import Data.Maybe +import Data.Coerce +import Text.Pandoc.Readers.Docx.Util +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.XML.Light + +newtype CharStyleId = CharStyleId String + deriving (Show, Eq, Ord, IsString, FromStyleId) +newtype ParaStyleId = ParaStyleId String + deriving (Show, Eq, Ord, IsString, FromStyleId) + +newtype CharStyleName = CharStyleName CIString + deriving (Show, Eq, Ord, IsString, FromStyleName) +newtype ParaStyleName = ParaStyleName CIString + deriving (Show, Eq, Ord, IsString, FromStyleName) + +-- Case-insensitive comparisons +newtype CIString = CIString String deriving (Show, IsString, FromStyleName) + +class FromStyleName a where + fromStyleName :: a -> String + +instance FromStyleName String where + fromStyleName = id + +class FromStyleId a where + fromStyleId :: a -> String + +instance FromStyleId String where + fromStyleId = id + +instance Eq CIString where + (==) = (==) `on` map toLower . coerce + +instance Ord CIString where + compare = compare `on` map toLower . coerce + +data VertAlign = BaseLn | SupScrpt | SubScrpt + deriving Show + +data CharStyle = CharStyle { cStyleId :: CharStyleId + , cStyleName :: CharStyleName + , cStyleData :: RunStyle + } deriving (Show) + +data RunStyle = RunStyle { isBold :: Maybe Bool + , isItalic :: Maybe Bool + , isSmallCaps :: Maybe Bool + , isStrike :: Maybe Bool + , isRTL :: Maybe Bool + , rVertAlign :: Maybe VertAlign + , rUnderline :: Maybe String + , rParentStyle :: Maybe CharStyle + } + deriving Show + +data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) + , numInfo :: Maybe (String, String) + , psParentStyle :: Maybe ParStyle + , pStyleName :: ParaStyleName + , pStyleId :: ParaStyleId + } + deriving Show + +defaultRunStyle :: RunStyle +defaultRunStyle = RunStyle { isBold = Nothing + , isItalic = Nothing + , isSmallCaps = Nothing + , isStrike = Nothing + , isRTL = Nothing + , rVertAlign = Nothing + , rUnderline = Nothing + , rParentStyle = Nothing + } + +archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) => + (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) +archiveToStyles' conv1 conv2 zf = + let stylesElem = findEntryByPath "word/styles.xml" zf >>= + (parseXMLDoc . UTF8.toStringLazy . fromEntry) + in + case stylesElem of + Nothing -> (M.empty, M.empty) + Just styElem -> + let namespaces = elemToNameSpaces styElem + in + ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing, + M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing) + +isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool +isBasedOnStyle ns element parentStyle + | isElem ns "w" "style" element + , Just styleType <- findAttrByName ns "w" "type" element + , styleType == cStyleType parentStyle + , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= + findAttrByName ns "w" "val" + , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) + | isElem ns "w" "style" element + , Just styleType <- findAttrByName ns "w" "type" element + , styleType == cStyleType parentStyle + , Nothing <- findChildByName ns "w" "basedOn" element + , Nothing <- parentStyle = True + | otherwise = False + +class HasStyleId a => ElemToStyle a where + cStyleType :: Maybe a -> String + elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a + +class FromStyleId (StyleId a) => HasStyleId a where + type StyleId a + getStyleId :: a -> StyleId a + +class FromStyleName (StyleName a) => HasStyleName a where + type StyleName a + getStyleName :: a -> StyleName a + +instance ElemToStyle CharStyle where + cStyleType _ = "character" + elemToStyle ns element parentStyle + | isElem ns "w" "style" element + , Just "character" <- findAttrByName ns "w" "type" element = + elemToCharStyle ns element parentStyle + | otherwise = Nothing + +instance HasStyleId CharStyle where + type StyleId CharStyle = CharStyleId + getStyleId = cStyleId + +instance HasStyleName CharStyle where + type StyleName CharStyle = CharStyleName + getStyleName = cStyleName + +instance ElemToStyle ParStyle where + cStyleType _ = "paragraph" + elemToStyle ns element parentStyle + | isElem ns "w" "style" element + , Just "paragraph" <- findAttrByName ns "w" "type" element + = elemToParStyleData ns element parentStyle + | otherwise = Nothing + +instance HasStyleId ParStyle where + type StyleId ParStyle = ParaStyleId + getStyleId = pStyleId + +instance HasStyleName ParStyle where + type StyleName ParStyle = ParaStyleName + getStyleName = pStyleName + +getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] +getStyleChildren ns element parentStyle + | isElem ns "w" "styles" element = + mapMaybe (\e -> elemToStyle ns e parentStyle) $ + filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element + | otherwise = [] + +buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] +buildBasedOnList ns element rootStyle = + case getStyleChildren ns element rootStyle of + [] -> [] + stys -> stys ++ + concatMap (buildBasedOnList ns element . Just) stys + +stringToInteger :: String -> Maybe Integer +stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) + +checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool +checkOnOff ns rPr tag + | Just t <- findChild tag rPr + , Just val <- findAttrByName ns "w" "val" t = + Just $ case val of + "true" -> True + "false" -> False + "on" -> True + "off" -> False + "1" -> True + "0" -> False + _ -> False + | Just _ <- findChild tag rPr = Just True +checkOnOff _ _ _ = Nothing + +elemToCharStyle :: NameSpaces + -> Element -> Maybe CharStyle -> Maybe CharStyle +elemToCharStyle ns element parentStyle + = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) + <*> getElementStyleName ns element + <*> (Just $ elemToRunStyle ns element parentStyle) + +elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle +elemToRunStyle ns element parentStyle + | Just rPr <- findChildByName ns "w" "rPr" element = + RunStyle + { + isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus` + checkOnOff ns rPr (elemName ns "w" "bCs") + , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus` + checkOnOff ns rPr (elemName ns "w" "iCs") + , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps") + , isStrike = checkOnOff ns rPr (elemName ns "w" "strike") + , isRTL = checkOnOff ns rPr (elemName ns "w" "rtl") + , rVertAlign = + findChildByName ns "w" "vertAlign" rPr >>= + findAttrByName ns "w" "val" >>= + \v -> Just $ case v of + "superscript" -> SupScrpt + "subscript" -> SubScrpt + _ -> BaseLn + , rUnderline = + findChildByName ns "w" "u" rPr >>= + findAttrByName ns "w" "val" + , rParentStyle = parentStyle + } +elemToRunStyle _ _ _ = defaultRunStyle + +getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) +getHeaderLevel ns element + | Just styleName <- getElementStyleName ns element + , Just n <- stringToInteger =<< + (stripPrefix "heading " . map toLower $ + fromStyleName styleName) + , n > 0 = Just (styleName, fromInteger n) +getHeaderLevel _ _ = Nothing + +getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a +getElementStyleName ns el = coerce <$> + ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") + <|> findAttrByName ns "w" "styleId" el) + +getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo ns element = do + let numPr = findChildByName ns "w" "pPr" element >>= + findChildByName ns "w" "numPr" + lvl = fromMaybe "0" (numPr >>= + findChildByName ns "w" "ilvl" >>= + findAttrByName ns "w" "val") + numId <- numPr >>= + findChildByName ns "w" "numId" >>= + findAttrByName ns "w" "val" + return (numId, lvl) + +elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle +elemToParStyleData ns element parentStyle + | Just styleId <- findAttrByName ns "w" "styleId" element + , Just styleName <- getElementStyleName ns element + = Just $ ParStyle + { + headingLev = getHeaderLevel ns element + , numInfo = getNumInfo ns element + , psParentStyle = parentStyle + , pStyleName = styleName + , pStyleId = ParaStyleId styleId + } +elemToParStyleData _ _ _ = Nothing diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs deleted file mode 100644 index bdf7b4df2..000000000 --- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs +++ /dev/null @@ -1,123 +0,0 @@ -{-# LANGUAGE NoImplicitPrelude #-} -{- | - Module : Text.Pandoc.Readers.Docx.StyleMaps - Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>, - 2014-2019 John MacFarlane <jgm@berkeley.edu>, - 2015 Nikolay Yakimov <root@livid.pp.ru> - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> - Stability : alpha - Portability : portable - -Mappings of element styles (word to pandoc-internal). --} -module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) - , alterMap - , getMap - , defaultStyleMaps - , getStyleMaps - , getStyleId - , hasStyleName - ) where - -import Prelude -import Control.Monad.State.Strict -import Data.Char (toLower) -import qualified Data.Map as M -import Text.Pandoc.Readers.Docx.Util -import Text.XML.Light - -newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) -newtype CharStyleMap = CharStyleMap ( M.Map String String ) - -class StyleMap a where - alterMap :: (M.Map String String -> M.Map String String) -> a -> a - getMap :: a -> M.Map String String - -instance StyleMap ParaStyleMap where - alterMap f (ParaStyleMap m) = ParaStyleMap $ f m - getMap (ParaStyleMap m) = m - -instance StyleMap CharStyleMap where - alterMap f (CharStyleMap m) = CharStyleMap $ f m - getMap (CharStyleMap m) = m - -insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a -insert (Just k) (Just v) m = alterMap (M.insert k v) m -insert _ _ m = m - -getStyleId :: (StyleMap a) => String -> a -> String -getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap - -hasStyleName :: (StyleMap a) => String -> a -> Bool -hasStyleName styleName = M.member (map toLower styleName) . getMap - -data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces - , sParaStyleMap :: ParaStyleMap - , sCharStyleMap :: CharStyleMap - } - -data StyleType = ParaStyle | CharStyle - -defaultStyleMaps :: StyleMaps -defaultStyleMaps = StyleMaps { sNameSpaces = [] - , sParaStyleMap = ParaStyleMap M.empty - , sCharStyleMap = CharStyleMap M.empty - } - -type StateM a = State StyleMaps a - -getStyleMaps :: Element -> StyleMaps -getStyleMaps docElem = execState genStyleMap state' - where - state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} - genStyleItem e = do - styleType <- getStyleType e - styleId <- getAttrStyleId e - nameValLowercase <- fmap (map toLower) `fmap` getNameVal e - case styleType of - Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId - Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId - _ -> return () - genStyleMap = do - style <- elemName' "style" - let styles = findChildren style docElem - forM_ styles genStyleItem - -modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM () -modParaStyleMap f = modify $ \s -> - s {sParaStyleMap = f $ sParaStyleMap s} - -modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM () -modCharStyleMap f = modify $ \s -> - s {sCharStyleMap = f $ sCharStyleMap s} - -getStyleType :: Element -> StateM (Maybe StyleType) -getStyleType e = do - styleTypeStr <- getAttrType e - case styleTypeStr of - Just "paragraph" -> return $ Just ParaStyle - Just "character" -> return $ Just CharStyle - _ -> return Nothing - -getAttrType :: Element -> StateM (Maybe String) -getAttrType el = do - name <- elemName' "type" - return $ findAttr name el - -getAttrStyleId :: Element -> StateM (Maybe String) -getAttrStyleId el = do - name <- elemName' "styleId" - return $ findAttr name el - -getNameVal :: Element -> StateM (Maybe String) -getNameVal el = do - name <- elemName' "name" - val <- elemName' "val" - return $ findChild name el >>= findAttr val - -elemName' :: String -> StateM QName -elemName' name = do - namespaces <- gets sNameSpaces - return $ elemName namespaces "w" name |