aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx
diff options
context:
space:
mode:
authorNikolay Yakimov <root@livid.pp.ru>2019-09-22 23:00:35 +0400
committerJohn MacFarlane <jgm@berkeley.edu>2019-09-22 12:00:35 -0700
commit9b6ee81c1916bb23d2cb24534adb88d65b4642df (patch)
tree1e5ca76bcb2c4617bfdff22ee299c1c9b9966138 /src/Text/Pandoc/Readers/Docx
parentd247e9f72e9c9a86cb0053cffc607b5f84f8b3a4 (diff)
downloadpandoc-9b6ee81c1916bb23d2cb24534adb88d65b4642df.tar.gz
[Docx Writer] Re-use Readers.Docx.Parse for StyleMap (#5766)
* [Docx Parser] Move style-parsing-specific code to a new module * [Docx Writer] Re-use Readers.Docx.Parse.Styles for StyleMap * [Docx Writer] Move Readers.Docx.StyleMap to Writers.Docx.StyleMap It's never used outside of writer code, so it makes more sense to scope it under writers really.
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs260
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse/Styles.hs304
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs123
3 files changed, 307 insertions, 380 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 00c5fb0be..8c5c94bb9 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,14 +1,9 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ViewPatterns #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
+ 2019 Nikolay Yakimov <root@livid.pp.ru>
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -57,6 +52,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, leftBiasedMergeRunStyle
) where
import Prelude
+import Text.Pandoc.Readers.Docx.Parse.Styles
import Codec.Archive.Zip
import Control.Applicative ((<|>))
import Control.Monad.Except
@@ -64,13 +60,10 @@ import Control.Monad.Reader
import Control.Monad.State.Strict
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
-import Data.Char (chr, ord, readLitChar, toLower)
+import Data.Char (chr, ord, readLitChar)
import Data.List
-import Data.Function (on)
-import Data.String (IsString(..))
import qualified Data.Map as M
import Data.Maybe
-import Data.Coerce
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
import Text.Pandoc.Readers.Docx.Fields
@@ -262,37 +255,6 @@ newtype Row = Row [Cell]
newtype Cell = Cell [BodyPart]
deriving Show
-newtype CharStyleId = CharStyleId { fromCharStyleId :: String }
- deriving (Show, Eq, Ord, FromStyleId)
-newtype ParaStyleId = ParaStyleId { fromParaStyleId :: String }
- deriving (Show, Eq, Ord, FromStyleId)
-
-newtype CharStyleName = CharStyleName { fromCharStyleName :: CIString }
- deriving (Show, Eq, Ord, IsString, FromStyleName)
-newtype ParaStyleName = ParaStyleName { fromParaStyleName :: CIString }
- deriving (Show, Eq, Ord, IsString, FromStyleName)
-
--- Case-insensitive comparisons
-newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
-
-class FromStyleName a where
- fromStyleName :: a -> String
-
-instance FromStyleName String where
- fromStyleName = id
-
-class FromStyleId a where
- fromStyleId :: a -> String
-
-instance FromStyleId String where
- fromStyleId = id
-
-instance Eq CIString where
- (==) = (==) `on` map toLower . coerce
-
-instance Ord CIString where
- compare = compare `on` map toLower . coerce
-
leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle
leftBiasedMergeRunStyle a b = RunStyle
{ isBold = isBold a <|> isBold b
@@ -333,44 +295,6 @@ data Run = Run RunStyle [RunElem]
data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show
-data VertAlign = BaseLn | SupScrpt | SubScrpt
- deriving Show
-
-data CharStyle = CharStyle { cStyleId :: CharStyleId
- , cStyleName :: CharStyleName
- , cStyleData :: RunStyle
- } deriving (Show)
-
-data RunStyle = RunStyle { isBold :: Maybe Bool
- , isItalic :: Maybe Bool
- , isSmallCaps :: Maybe Bool
- , isStrike :: Maybe Bool
- , isRTL :: Maybe Bool
- , rVertAlign :: Maybe VertAlign
- , rUnderline :: Maybe String
- , rParentStyle :: Maybe CharStyle
- }
- deriving Show
-
-data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
- , numInfo :: Maybe (String, String)
- , psParentStyle :: Maybe ParStyle
- , pStyleName :: ParaStyleName
- , pStyleId :: ParaStyleId
- }
- deriving Show
-
-defaultRunStyle :: RunStyle
-defaultRunStyle = RunStyle { isBold = Nothing
- , isItalic = Nothing
- , isSmallCaps = Nothing
- , isStrike = Nothing
- , isRTL = Nothing
- , rVertAlign = Nothing
- , rUnderline = Nothing
- , rParentStyle = Nothing
- }
-
type Target = String
type Anchor = String
type URL = String
@@ -449,46 +373,6 @@ elemToBody _ _ = throwError WrongElem
archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
archiveToStyles = archiveToStyles' getStyleId getStyleId
-archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
- (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
-archiveToStyles' conv1 conv2 zf =
- let stylesElem = findEntryByPath "word/styles.xml" zf >>=
- (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- in
- case stylesElem of
- Nothing -> (M.empty, M.empty)
- Just styElem ->
- let namespaces = elemToNameSpaces styElem
- in
- ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
- M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
-
-isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
-isBasedOnStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just styleType <- findAttrByName ns "w" "type" element
- , styleType == cStyleType parentStyle
- , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
- findAttrByName ns "w" "val"
- , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
- | isElem ns "w" "style" element
- , Just styleType <- findAttrByName ns "w" "type" element
- , styleType == cStyleType parentStyle
- , Nothing <- findChildByName ns "w" "basedOn" element
- , Nothing <- parentStyle = True
- | otherwise = False
-
-class HasStyleId a => ElemToStyle a where
- cStyleType :: Maybe a -> String
- elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
-
-class FromStyleId (StyleId a) => HasStyleId a where
- type StyleId a
- getStyleId :: a -> StyleId a
-
-class FromStyleName (StyleName a) => HasStyleName a where
- type StyleName a
- getStyleName :: a -> StyleName a
class HasParentStyle a where
getParentStyle :: a -> Maybe a
@@ -511,52 +395,6 @@ constructBogusParStyleData stName = ParStyle
, pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName
}
-instance ElemToStyle CharStyle where
- cStyleType _ = "character"
- elemToStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just "character" <- findAttrByName ns "w" "type" element =
- elemToCharStyle ns element parentStyle
- | otherwise = Nothing
-
-instance HasStyleId CharStyle where
- type StyleId CharStyle = CharStyleId
- getStyleId = cStyleId
-
-instance HasStyleName CharStyle where
- type StyleName CharStyle = CharStyleName
- getStyleName = cStyleName
-
-instance ElemToStyle ParStyle where
- cStyleType _ = "paragraph"
- elemToStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just "paragraph" <- findAttrByName ns "w" "type" element
- = elemToParStyleData ns element parentStyle
- | otherwise = Nothing
-
-instance HasStyleId ParStyle where
- type StyleId ParStyle = ParaStyleId
- getStyleId = pStyleId
-
-instance HasStyleName ParStyle where
- type StyleName ParStyle = ParaStyleName
- getStyleName = pStyleName
-
-getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
-getStyleChildren ns element parentStyle
- | isElem ns "w" "styles" element =
- mapMaybe (\e -> elemToStyle ns e parentStyle) $
- filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
- | otherwise = []
-
-buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
-buildBasedOnList ns element rootStyle =
- case getStyleChildren ns element rootStyle of
- [] -> []
- stys -> stys ++
- concatMap (buildBasedOnList ns element . Just) stys
-
archiveToNotes :: Archive -> Notes
archiveToNotes zf =
let fnElem = findEntryByPath "word/footnotes.xml" zf
@@ -789,9 +627,6 @@ testBitMask bitMaskS n =
[] -> False
((n', _) : _) -> (n' .|. n) /= 0
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
-
pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int)
pHeading = getParStyleField headingLev . pStyle
@@ -1166,21 +1001,6 @@ elemToParagraphStyle ns element sty
}
elemToParagraphStyle _ _ _ = defaultParagraphStyle
-checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
-checkOnOff ns rPr tag
- | Just t <- findChild tag rPr
- , Just val <- findAttrByName ns "w" "val" t =
- Just $ case val of
- "true" -> True
- "false" -> False
- "on" -> True
- "off" -> False
- "1" -> True
- "0" -> False
- _ -> False
- | Just _ <- findChild tag rPr = Just True
-checkOnOff _ _ _ = Nothing
-
elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
elemToRunStyleD ns element
| Just rPr <- findChildByName ns "w" "rPr" element = do
@@ -1192,80 +1012,6 @@ elemToRunStyleD ns element
return $ elemToRunStyle ns element parentSty
elemToRunStyleD _ _ = return defaultRunStyle
-elemToCharStyle :: NameSpaces
- -> Element -> Maybe CharStyle -> Maybe CharStyle
-elemToCharStyle ns element parentStyle
- = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
- <*> getElementStyleName ns element
- <*> (Just $ elemToRunStyle ns element parentStyle)
-
-elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
-elemToRunStyle ns element parentStyle
- | Just rPr <- findChildByName ns "w" "rPr" element =
- RunStyle
- {
- isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus`
- checkOnOff ns rPr (elemName ns "w" "bCs")
- , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus`
- checkOnOff ns rPr (elemName ns "w" "iCs")
- , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
- , isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
- , isRTL = checkOnOff ns rPr (elemName ns "w" "rtl")
- , rVertAlign =
- findChildByName ns "w" "vertAlign" rPr >>=
- findAttrByName ns "w" "val" >>=
- \v -> Just $ case v of
- "superscript" -> SupScrpt
- "subscript" -> SubScrpt
- _ -> BaseLn
- , rUnderline =
- findChildByName ns "w" "u" rPr >>=
- findAttrByName ns "w" "val"
- , rParentStyle = parentStyle
- }
-elemToRunStyle _ _ _ = defaultRunStyle
-
-getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
-getHeaderLevel ns element
- | Just styleName <- getElementStyleName ns element
- , Just n <- stringToInteger =<<
- (stripPrefix "heading " . map toLower $
- fromStyleName styleName)
- , n > 0 = Just (styleName, fromInteger n)
-getHeaderLevel _ _ = Nothing
-
-getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
-getElementStyleName ns el = coerce <$>
- ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
- <|> findAttrByName ns "w" "styleId" el)
-
-getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
-getNumInfo ns element = do
- let numPr = findChildByName ns "w" "pPr" element >>=
- findChildByName ns "w" "numPr"
- lvl = fromMaybe "0" (numPr >>=
- findChildByName ns "w" "ilvl" >>=
- findAttrByName ns "w" "val")
- numId <- numPr >>=
- findChildByName ns "w" "numId" >>=
- findAttrByName ns "w" "val"
- return (numId, lvl)
-
-
-elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
-elemToParStyleData ns element parentStyle
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , Just styleName <- getElementStyleName ns element
- = Just $ ParStyle
- {
- headingLev = getHeaderLevel ns element
- , numInfo = getNumInfo ns element
- , psParentStyle = parentStyle
- , pStyleName = styleName
- , pStyleId = ParaStyleId styleId
- }
-elemToParStyleData _ _ _ = Nothing
-
elemToRunElem :: NameSpaces -> Element -> D RunElem
elemToRunElem ns element
| isElem ns "w" "t" element
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
new file mode 100644
index 000000000..c2d27e7fb
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -0,0 +1,304 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{- |
+ Module : Text.Pandoc.Readers.Docx.Parse.Styles
+ Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
+ 2019 Nikolay Yakimov <root@livid.pp.ru>
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+Type machinery and code for extraction and manipulation of docx styles
+-}
+
+module Text.Pandoc.Readers.Docx.Parse.Styles (
+ CharStyleId(..)
+ , CharStyle
+ , ParaStyleId(..)
+ , ParStyle(..)
+ , RunStyle(..)
+ , HasStyleName
+ , StyleName
+ , ParaStyleName
+ , CharStyleName
+ , FromStyleName
+ , VertAlign(..)
+ , StyleId
+ , HasStyleId
+ , archiveToStyles'
+ , getStyleId
+ , getStyleName
+ , cStyleData
+ , fromStyleName
+ , fromStyleId
+ , stringToInteger
+ , getNumInfo
+ , elemToRunStyle
+ , defaultRunStyle
+ ) where
+import Prelude
+import Codec.Archive.Zip
+import Control.Applicative ((<|>))
+import Control.Monad.Except
+import Data.Char (toLower)
+import Data.List
+import Data.Function (on)
+import Data.String (IsString(..))
+import qualified Data.Map as M
+import Data.Maybe
+import Data.Coerce
+import Text.Pandoc.Readers.Docx.Util
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.XML.Light
+
+newtype CharStyleId = CharStyleId String
+ deriving (Show, Eq, Ord, IsString, FromStyleId)
+newtype ParaStyleId = ParaStyleId String
+ deriving (Show, Eq, Ord, IsString, FromStyleId)
+
+newtype CharStyleName = CharStyleName CIString
+ deriving (Show, Eq, Ord, IsString, FromStyleName)
+newtype ParaStyleName = ParaStyleName CIString
+ deriving (Show, Eq, Ord, IsString, FromStyleName)
+
+-- Case-insensitive comparisons
+newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
+
+class FromStyleName a where
+ fromStyleName :: a -> String
+
+instance FromStyleName String where
+ fromStyleName = id
+
+class FromStyleId a where
+ fromStyleId :: a -> String
+
+instance FromStyleId String where
+ fromStyleId = id
+
+instance Eq CIString where
+ (==) = (==) `on` map toLower . coerce
+
+instance Ord CIString where
+ compare = compare `on` map toLower . coerce
+
+data VertAlign = BaseLn | SupScrpt | SubScrpt
+ deriving Show
+
+data CharStyle = CharStyle { cStyleId :: CharStyleId
+ , cStyleName :: CharStyleName
+ , cStyleData :: RunStyle
+ } deriving (Show)
+
+data RunStyle = RunStyle { isBold :: Maybe Bool
+ , isItalic :: Maybe Bool
+ , isSmallCaps :: Maybe Bool
+ , isStrike :: Maybe Bool
+ , isRTL :: Maybe Bool
+ , rVertAlign :: Maybe VertAlign
+ , rUnderline :: Maybe String
+ , rParentStyle :: Maybe CharStyle
+ }
+ deriving Show
+
+data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
+ , numInfo :: Maybe (String, String)
+ , psParentStyle :: Maybe ParStyle
+ , pStyleName :: ParaStyleName
+ , pStyleId :: ParaStyleId
+ }
+ deriving Show
+
+defaultRunStyle :: RunStyle
+defaultRunStyle = RunStyle { isBold = Nothing
+ , isItalic = Nothing
+ , isSmallCaps = Nothing
+ , isStrike = Nothing
+ , isRTL = Nothing
+ , rVertAlign = Nothing
+ , rUnderline = Nothing
+ , rParentStyle = Nothing
+ }
+
+archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
+ (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles' conv1 conv2 zf =
+ let stylesElem = findEntryByPath "word/styles.xml" zf >>=
+ (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ in
+ case stylesElem of
+ Nothing -> (M.empty, M.empty)
+ Just styElem ->
+ let namespaces = elemToNameSpaces styElem
+ in
+ ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
+ M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
+
+isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
+isBasedOnStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just styleType <- findAttrByName ns "w" "type" element
+ , styleType == cStyleType parentStyle
+ , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
+ findAttrByName ns "w" "val"
+ , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
+ | isElem ns "w" "style" element
+ , Just styleType <- findAttrByName ns "w" "type" element
+ , styleType == cStyleType parentStyle
+ , Nothing <- findChildByName ns "w" "basedOn" element
+ , Nothing <- parentStyle = True
+ | otherwise = False
+
+class HasStyleId a => ElemToStyle a where
+ cStyleType :: Maybe a -> String
+ elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
+
+class FromStyleId (StyleId a) => HasStyleId a where
+ type StyleId a
+ getStyleId :: a -> StyleId a
+
+class FromStyleName (StyleName a) => HasStyleName a where
+ type StyleName a
+ getStyleName :: a -> StyleName a
+
+instance ElemToStyle CharStyle where
+ cStyleType _ = "character"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "character" <- findAttrByName ns "w" "type" element =
+ elemToCharStyle ns element parentStyle
+ | otherwise = Nothing
+
+instance HasStyleId CharStyle where
+ type StyleId CharStyle = CharStyleId
+ getStyleId = cStyleId
+
+instance HasStyleName CharStyle where
+ type StyleName CharStyle = CharStyleName
+ getStyleName = cStyleName
+
+instance ElemToStyle ParStyle where
+ cStyleType _ = "paragraph"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "paragraph" <- findAttrByName ns "w" "type" element
+ = elemToParStyleData ns element parentStyle
+ | otherwise = Nothing
+
+instance HasStyleId ParStyle where
+ type StyleId ParStyle = ParaStyleId
+ getStyleId = pStyleId
+
+instance HasStyleName ParStyle where
+ type StyleName ParStyle = ParaStyleName
+ getStyleName = pStyleName
+
+getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
+getStyleChildren ns element parentStyle
+ | isElem ns "w" "styles" element =
+ mapMaybe (\e -> elemToStyle ns e parentStyle) $
+ filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
+ | otherwise = []
+
+buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
+buildBasedOnList ns element rootStyle =
+ case getStyleChildren ns element rootStyle of
+ [] -> []
+ stys -> stys ++
+ concatMap (buildBasedOnList ns element . Just) stys
+
+stringToInteger :: String -> Maybe Integer
+stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+
+checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
+checkOnOff ns rPr tag
+ | Just t <- findChild tag rPr
+ , Just val <- findAttrByName ns "w" "val" t =
+ Just $ case val of
+ "true" -> True
+ "false" -> False
+ "on" -> True
+ "off" -> False
+ "1" -> True
+ "0" -> False
+ _ -> False
+ | Just _ <- findChild tag rPr = Just True
+checkOnOff _ _ _ = Nothing
+
+elemToCharStyle :: NameSpaces
+ -> Element -> Maybe CharStyle -> Maybe CharStyle
+elemToCharStyle ns element parentStyle
+ = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
+ <*> getElementStyleName ns element
+ <*> (Just $ elemToRunStyle ns element parentStyle)
+
+elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
+elemToRunStyle ns element parentStyle
+ | Just rPr <- findChildByName ns "w" "rPr" element =
+ RunStyle
+ {
+ isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus`
+ checkOnOff ns rPr (elemName ns "w" "bCs")
+ , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus`
+ checkOnOff ns rPr (elemName ns "w" "iCs")
+ , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
+ , isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
+ , isRTL = checkOnOff ns rPr (elemName ns "w" "rtl")
+ , rVertAlign =
+ findChildByName ns "w" "vertAlign" rPr >>=
+ findAttrByName ns "w" "val" >>=
+ \v -> Just $ case v of
+ "superscript" -> SupScrpt
+ "subscript" -> SubScrpt
+ _ -> BaseLn
+ , rUnderline =
+ findChildByName ns "w" "u" rPr >>=
+ findAttrByName ns "w" "val"
+ , rParentStyle = parentStyle
+ }
+elemToRunStyle _ _ _ = defaultRunStyle
+
+getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
+getHeaderLevel ns element
+ | Just styleName <- getElementStyleName ns element
+ , Just n <- stringToInteger =<<
+ (stripPrefix "heading " . map toLower $
+ fromStyleName styleName)
+ , n > 0 = Just (styleName, fromInteger n)
+getHeaderLevel _ _ = Nothing
+
+getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
+getElementStyleName ns el = coerce <$>
+ ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+ <|> findAttrByName ns "w" "styleId" el)
+
+getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo ns element = do
+ let numPr = findChildByName ns "w" "pPr" element >>=
+ findChildByName ns "w" "numPr"
+ lvl = fromMaybe "0" (numPr >>=
+ findChildByName ns "w" "ilvl" >>=
+ findAttrByName ns "w" "val")
+ numId <- numPr >>=
+ findChildByName ns "w" "numId" >>=
+ findAttrByName ns "w" "val"
+ return (numId, lvl)
+
+elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
+elemToParStyleData ns element parentStyle
+ | Just styleId <- findAttrByName ns "w" "styleId" element
+ , Just styleName <- getElementStyleName ns element
+ = Just $ ParStyle
+ {
+ headingLev = getHeaderLevel ns element
+ , numInfo = getNumInfo ns element
+ , psParentStyle = parentStyle
+ , pStyleName = styleName
+ , pStyleId = ParaStyleId styleId
+ }
+elemToParStyleData _ _ _ = Nothing
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
deleted file mode 100644
index bdf7b4df2..000000000
--- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs
+++ /dev/null
@@ -1,123 +0,0 @@
-{-# LANGUAGE NoImplicitPrelude #-}
-{- |
- Module : Text.Pandoc.Readers.Docx.StyleMaps
- Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>,
- 2014-2019 John MacFarlane <jgm@berkeley.edu>,
- 2015 Nikolay Yakimov <root@livid.pp.ru>
- License : GNU GPL, version 2 or above
-
- Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
- Stability : alpha
- Portability : portable
-
-Mappings of element styles (word to pandoc-internal).
--}
-module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
- , alterMap
- , getMap
- , defaultStyleMaps
- , getStyleMaps
- , getStyleId
- , hasStyleName
- ) where
-
-import Prelude
-import Control.Monad.State.Strict
-import Data.Char (toLower)
-import qualified Data.Map as M
-import Text.Pandoc.Readers.Docx.Util
-import Text.XML.Light
-
-newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
-newtype CharStyleMap = CharStyleMap ( M.Map String String )
-
-class StyleMap a where
- alterMap :: (M.Map String String -> M.Map String String) -> a -> a
- getMap :: a -> M.Map String String
-
-instance StyleMap ParaStyleMap where
- alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
- getMap (ParaStyleMap m) = m
-
-instance StyleMap CharStyleMap where
- alterMap f (CharStyleMap m) = CharStyleMap $ f m
- getMap (CharStyleMap m) = m
-
-insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a
-insert (Just k) (Just v) m = alterMap (M.insert k v) m
-insert _ _ m = m
-
-getStyleId :: (StyleMap a) => String -> a -> String
-getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
-
-hasStyleName :: (StyleMap a) => String -> a -> Bool
-hasStyleName styleName = M.member (map toLower styleName) . getMap
-
-data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
- , sParaStyleMap :: ParaStyleMap
- , sCharStyleMap :: CharStyleMap
- }
-
-data StyleType = ParaStyle | CharStyle
-
-defaultStyleMaps :: StyleMaps
-defaultStyleMaps = StyleMaps { sNameSpaces = []
- , sParaStyleMap = ParaStyleMap M.empty
- , sCharStyleMap = CharStyleMap M.empty
- }
-
-type StateM a = State StyleMaps a
-
-getStyleMaps :: Element -> StyleMaps
-getStyleMaps docElem = execState genStyleMap state'
- where
- state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
- genStyleItem e = do
- styleType <- getStyleType e
- styleId <- getAttrStyleId e
- nameValLowercase <- fmap (map toLower) `fmap` getNameVal e
- case styleType of
- Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId
- Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId
- _ -> return ()
- genStyleMap = do
- style <- elemName' "style"
- let styles = findChildren style docElem
- forM_ styles genStyleItem
-
-modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM ()
-modParaStyleMap f = modify $ \s ->
- s {sParaStyleMap = f $ sParaStyleMap s}
-
-modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM ()
-modCharStyleMap f = modify $ \s ->
- s {sCharStyleMap = f $ sCharStyleMap s}
-
-getStyleType :: Element -> StateM (Maybe StyleType)
-getStyleType e = do
- styleTypeStr <- getAttrType e
- case styleTypeStr of
- Just "paragraph" -> return $ Just ParaStyle
- Just "character" -> return $ Just CharStyle
- _ -> return Nothing
-
-getAttrType :: Element -> StateM (Maybe String)
-getAttrType el = do
- name <- elemName' "type"
- return $ findAttr name el
-
-getAttrStyleId :: Element -> StateM (Maybe String)
-getAttrStyleId el = do
- name <- elemName' "styleId"
- return $ findAttr name el
-
-getNameVal :: Element -> StateM (Maybe String)
-getNameVal el = do
- name <- elemName' "name"
- val <- elemName' "val"
- return $ findChild name el >>= findAttr val
-
-elemName' :: String -> StateM QName
-elemName' name = do
- namespaces <- gets sNameSpaces
- return $ elemName namespaces "w" name