aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
diff options
context:
space:
mode:
authorNikolay Yakimov <root@livid.pp.ru>2019-09-22 23:00:35 +0400
committerJohn MacFarlane <jgm@berkeley.edu>2019-09-22 12:00:35 -0700
commit9b6ee81c1916bb23d2cb24534adb88d65b4642df (patch)
tree1e5ca76bcb2c4617bfdff22ee299c1c9b9966138 /src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
parentd247e9f72e9c9a86cb0053cffc607b5f84f8b3a4 (diff)
downloadpandoc-9b6ee81c1916bb23d2cb24534adb88d65b4642df.tar.gz
[Docx Writer] Re-use Readers.Docx.Parse for StyleMap (#5766)
* [Docx Parser] Move style-parsing-specific code to a new module * [Docx Writer] Re-use Readers.Docx.Parse.Styles for StyleMap * [Docx Writer] Move Readers.Docx.StyleMap to Writers.Docx.StyleMap It's never used outside of writer code, so it makes more sense to scope it under writers really.
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx/Parse/Styles.hs')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse/Styles.hs304
1 files changed, 304 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
new file mode 100644
index 000000000..c2d27e7fb
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -0,0 +1,304 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{- |
+ Module : Text.Pandoc.Readers.Docx.Parse.Styles
+ Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
+ 2019 Nikolay Yakimov <root@livid.pp.ru>
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+Type machinery and code for extraction and manipulation of docx styles
+-}
+
+module Text.Pandoc.Readers.Docx.Parse.Styles (
+ CharStyleId(..)
+ , CharStyle
+ , ParaStyleId(..)
+ , ParStyle(..)
+ , RunStyle(..)
+ , HasStyleName
+ , StyleName
+ , ParaStyleName
+ , CharStyleName
+ , FromStyleName
+ , VertAlign(..)
+ , StyleId
+ , HasStyleId
+ , archiveToStyles'
+ , getStyleId
+ , getStyleName
+ , cStyleData
+ , fromStyleName
+ , fromStyleId
+ , stringToInteger
+ , getNumInfo
+ , elemToRunStyle
+ , defaultRunStyle
+ ) where
+import Prelude
+import Codec.Archive.Zip
+import Control.Applicative ((<|>))
+import Control.Monad.Except
+import Data.Char (toLower)
+import Data.List
+import Data.Function (on)
+import Data.String (IsString(..))
+import qualified Data.Map as M
+import Data.Maybe
+import Data.Coerce
+import Text.Pandoc.Readers.Docx.Util
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.XML.Light
+
+newtype CharStyleId = CharStyleId String
+ deriving (Show, Eq, Ord, IsString, FromStyleId)
+newtype ParaStyleId = ParaStyleId String
+ deriving (Show, Eq, Ord, IsString, FromStyleId)
+
+newtype CharStyleName = CharStyleName CIString
+ deriving (Show, Eq, Ord, IsString, FromStyleName)
+newtype ParaStyleName = ParaStyleName CIString
+ deriving (Show, Eq, Ord, IsString, FromStyleName)
+
+-- Case-insensitive comparisons
+newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
+
+class FromStyleName a where
+ fromStyleName :: a -> String
+
+instance FromStyleName String where
+ fromStyleName = id
+
+class FromStyleId a where
+ fromStyleId :: a -> String
+
+instance FromStyleId String where
+ fromStyleId = id
+
+instance Eq CIString where
+ (==) = (==) `on` map toLower . coerce
+
+instance Ord CIString where
+ compare = compare `on` map toLower . coerce
+
+data VertAlign = BaseLn | SupScrpt | SubScrpt
+ deriving Show
+
+data CharStyle = CharStyle { cStyleId :: CharStyleId
+ , cStyleName :: CharStyleName
+ , cStyleData :: RunStyle
+ } deriving (Show)
+
+data RunStyle = RunStyle { isBold :: Maybe Bool
+ , isItalic :: Maybe Bool
+ , isSmallCaps :: Maybe Bool
+ , isStrike :: Maybe Bool
+ , isRTL :: Maybe Bool
+ , rVertAlign :: Maybe VertAlign
+ , rUnderline :: Maybe String
+ , rParentStyle :: Maybe CharStyle
+ }
+ deriving Show
+
+data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
+ , numInfo :: Maybe (String, String)
+ , psParentStyle :: Maybe ParStyle
+ , pStyleName :: ParaStyleName
+ , pStyleId :: ParaStyleId
+ }
+ deriving Show
+
+defaultRunStyle :: RunStyle
+defaultRunStyle = RunStyle { isBold = Nothing
+ , isItalic = Nothing
+ , isSmallCaps = Nothing
+ , isStrike = Nothing
+ , isRTL = Nothing
+ , rVertAlign = Nothing
+ , rUnderline = Nothing
+ , rParentStyle = Nothing
+ }
+
+archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
+ (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles' conv1 conv2 zf =
+ let stylesElem = findEntryByPath "word/styles.xml" zf >>=
+ (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ in
+ case stylesElem of
+ Nothing -> (M.empty, M.empty)
+ Just styElem ->
+ let namespaces = elemToNameSpaces styElem
+ in
+ ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
+ M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
+
+isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
+isBasedOnStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just styleType <- findAttrByName ns "w" "type" element
+ , styleType == cStyleType parentStyle
+ , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
+ findAttrByName ns "w" "val"
+ , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
+ | isElem ns "w" "style" element
+ , Just styleType <- findAttrByName ns "w" "type" element
+ , styleType == cStyleType parentStyle
+ , Nothing <- findChildByName ns "w" "basedOn" element
+ , Nothing <- parentStyle = True
+ | otherwise = False
+
+class HasStyleId a => ElemToStyle a where
+ cStyleType :: Maybe a -> String
+ elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
+
+class FromStyleId (StyleId a) => HasStyleId a where
+ type StyleId a
+ getStyleId :: a -> StyleId a
+
+class FromStyleName (StyleName a) => HasStyleName a where
+ type StyleName a
+ getStyleName :: a -> StyleName a
+
+instance ElemToStyle CharStyle where
+ cStyleType _ = "character"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "character" <- findAttrByName ns "w" "type" element =
+ elemToCharStyle ns element parentStyle
+ | otherwise = Nothing
+
+instance HasStyleId CharStyle where
+ type StyleId CharStyle = CharStyleId
+ getStyleId = cStyleId
+
+instance HasStyleName CharStyle where
+ type StyleName CharStyle = CharStyleName
+ getStyleName = cStyleName
+
+instance ElemToStyle ParStyle where
+ cStyleType _ = "paragraph"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "paragraph" <- findAttrByName ns "w" "type" element
+ = elemToParStyleData ns element parentStyle
+ | otherwise = Nothing
+
+instance HasStyleId ParStyle where
+ type StyleId ParStyle = ParaStyleId
+ getStyleId = pStyleId
+
+instance HasStyleName ParStyle where
+ type StyleName ParStyle = ParaStyleName
+ getStyleName = pStyleName
+
+getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
+getStyleChildren ns element parentStyle
+ | isElem ns "w" "styles" element =
+ mapMaybe (\e -> elemToStyle ns e parentStyle) $
+ filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
+ | otherwise = []
+
+buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
+buildBasedOnList ns element rootStyle =
+ case getStyleChildren ns element rootStyle of
+ [] -> []
+ stys -> stys ++
+ concatMap (buildBasedOnList ns element . Just) stys
+
+stringToInteger :: String -> Maybe Integer
+stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+
+checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
+checkOnOff ns rPr tag
+ | Just t <- findChild tag rPr
+ , Just val <- findAttrByName ns "w" "val" t =
+ Just $ case val of
+ "true" -> True
+ "false" -> False
+ "on" -> True
+ "off" -> False
+ "1" -> True
+ "0" -> False
+ _ -> False
+ | Just _ <- findChild tag rPr = Just True
+checkOnOff _ _ _ = Nothing
+
+elemToCharStyle :: NameSpaces
+ -> Element -> Maybe CharStyle -> Maybe CharStyle
+elemToCharStyle ns element parentStyle
+ = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
+ <*> getElementStyleName ns element
+ <*> (Just $ elemToRunStyle ns element parentStyle)
+
+elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
+elemToRunStyle ns element parentStyle
+ | Just rPr <- findChildByName ns "w" "rPr" element =
+ RunStyle
+ {
+ isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus`
+ checkOnOff ns rPr (elemName ns "w" "bCs")
+ , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus`
+ checkOnOff ns rPr (elemName ns "w" "iCs")
+ , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
+ , isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
+ , isRTL = checkOnOff ns rPr (elemName ns "w" "rtl")
+ , rVertAlign =
+ findChildByName ns "w" "vertAlign" rPr >>=
+ findAttrByName ns "w" "val" >>=
+ \v -> Just $ case v of
+ "superscript" -> SupScrpt
+ "subscript" -> SubScrpt
+ _ -> BaseLn
+ , rUnderline =
+ findChildByName ns "w" "u" rPr >>=
+ findAttrByName ns "w" "val"
+ , rParentStyle = parentStyle
+ }
+elemToRunStyle _ _ _ = defaultRunStyle
+
+getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
+getHeaderLevel ns element
+ | Just styleName <- getElementStyleName ns element
+ , Just n <- stringToInteger =<<
+ (stripPrefix "heading " . map toLower $
+ fromStyleName styleName)
+ , n > 0 = Just (styleName, fromInteger n)
+getHeaderLevel _ _ = Nothing
+
+getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
+getElementStyleName ns el = coerce <$>
+ ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+ <|> findAttrByName ns "w" "styleId" el)
+
+getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo ns element = do
+ let numPr = findChildByName ns "w" "pPr" element >>=
+ findChildByName ns "w" "numPr"
+ lvl = fromMaybe "0" (numPr >>=
+ findChildByName ns "w" "ilvl" >>=
+ findAttrByName ns "w" "val")
+ numId <- numPr >>=
+ findChildByName ns "w" "numId" >>=
+ findAttrByName ns "w" "val"
+ return (numId, lvl)
+
+elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
+elemToParStyleData ns element parentStyle
+ | Just styleId <- findAttrByName ns "w" "styleId" element
+ , Just styleName <- getElementStyleName ns element
+ = Just $ ParStyle
+ {
+ headingLev = getHeaderLevel ns element
+ , numInfo = getNumInfo ns element
+ , psParentStyle = parentStyle
+ , pStyleName = styleName
+ , pStyleId = ParaStyleId styleId
+ }
+elemToParStyleData _ _ _ = Nothing