aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs80
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs106
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs26
3 files changed, 171 insertions, 41 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 671d2acf3..cce80fb48 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -65,6 +65,7 @@ import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML)
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
import Text.TeXMath (Exp)
+import Text.Pandoc.Readers.Docx.Util
import Data.Char (readLitChar, ord, chr, isDigit)
data ReaderEnv = ReaderEnv { envNotes :: Notes
@@ -108,8 +109,6 @@ mapD f xs =
in
concatMapM handler xs
-type NameSpaces = [(String, String)]
-
data Docx = Docx Document
deriving Show
@@ -158,6 +157,7 @@ data ParagraphStyle = ParagraphStyle { pStyle :: [String]
, indentation :: Maybe ParIndentation
, dropCap :: Bool
, pHeading :: Maybe (String, Int)
+ , pNumInfo :: Maybe (String, String)
, pBlockQuote :: Maybe Bool
}
deriving Show
@@ -167,6 +167,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
, indentation = Nothing
, dropCap = False
, pHeading = Nothing
+ , pNumInfo = Nothing
, pBlockQuote = Nothing
}
@@ -224,6 +225,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
, isBlockQuote :: Maybe Bool
+ , numInfo :: Maybe (String, String)
, psStyle :: Maybe ParStyle}
deriving Show
@@ -246,10 +248,6 @@ type ChangeId = String
type Author = String
type ChangeDate = String
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
-
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = do
let notes = archiveToNotes archive
@@ -266,7 +264,7 @@ archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
entry <- maybeToD $ findEntryByPath "word/document.xml" zf
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
+ let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
body <- elemToBody namespaces bodyElem
return $ Document namespaces body
@@ -285,7 +283,7 @@ archiveToStyles zf =
case stylesElem of
Nothing -> (M.empty, M.empty)
Just styElem ->
- let namespaces = mapMaybe attrToNSPair (elAttribs styElem)
+ let namespaces = elemToNameSpaces styElem
in
( M.fromList $ buildBasedOnList namespaces styElem
(Nothing :: Maybe CharStyle),
@@ -353,10 +351,10 @@ archiveToNotes zf =
enElem = findEntryByPath "word/endnotes.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
fn_namespaces = case fnElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Just e -> elemToNameSpaces e
Nothing -> []
en_namespaces = case enElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Just e -> elemToNameSpaces e
Nothing -> []
ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
fn = fnElem >>= (elemToNotes ns "footnote")
@@ -456,7 +454,7 @@ archiveToNumbering' zf = do
Nothing -> Just $ Numbering [] [] []
Just entry -> do
numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem)
+ let namespaces = elemToNameSpaces numberingElem
numElems = findChildren
(QName "num" (lookup "w" namespaces) (Just "w"))
numberingElem
@@ -485,15 +483,6 @@ elemToNotes _ _ _ = Nothing
---------------------------------------------
---------------------------------------------
-elemName :: NameSpaces -> String -> String -> QName
-elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix))
-
-isElem :: NameSpaces -> String -> String -> Element -> Bool
-isElem ns prefix name element =
- qName (elName element) == name &&
- qURI (elName element) == (lookup prefix ns)
-
-
elemToTblGrid :: NameSpaces -> Element -> D TblGrid
elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
let cols = findChildren (elemName ns "w" "gridCol") element
@@ -546,20 +535,6 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
stringToInteger}
elemToParIndentation _ _ = Nothing
-
-elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
-elemToNumInfo ns element | isElem ns "w" "p" element = do
- let pPr = findChild (elemName ns "w" "pPr") element
- numPr = pPr >>= findChild (elemName ns "w" "numPr")
- lvl <- numPr >>=
- findChild (elemName ns "w" "ilvl") >>=
- findAttr (elemName ns "w" "val")
- numId <- numPr >>=
- findChild (elemName ns "w" "numId") >>=
- findAttr (elemName ns "w" "val")
- return (numId, lvl)
-elemToNumInfo _ _ = Nothing
-
testBitMask :: String -> Int -> Bool
testBitMask bitMaskS n =
case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
@@ -578,20 +553,28 @@ elemToBodyPart ns element
return $ OMathPara expsLst
elemToBodyPart ns element
| isElem ns "w" "p" element
- , Just (numId, lvl) <- elemToNumInfo ns element = do
+ , Just (numId, lvl) <- getNumInfo ns element = do
sty <- asks envParStyles
let parstyle = elemToParagraphStyle ns element sty
parparts <- mapD (elemToParPart ns) (elChildren element)
num <- asks envNumbering
case lookupLevel numId lvl num of
- Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
- Nothing -> throwError WrongElem
+ Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
+ Nothing -> throwError WrongElem
elemToBodyPart ns element
| isElem ns "w" "p" element = do
- sty <- asks envParStyles
- let parstyle = elemToParagraphStyle ns element sty
- parparts <- mapD (elemToParPart ns) (elChildren element)
- return $ Paragraph parstyle parparts
+ sty <- asks envParStyles
+ let parstyle = elemToParagraphStyle ns element sty
+ parparts <- mapD (elemToParPart ns) (elChildren element)
+ case pNumInfo parstyle of
+ Just (numId, lvl) -> do
+ num <- asks envNumbering
+ case lookupLevel numId lvl num of
+ Just levelInfo ->
+ return $ ListItem parstyle numId lvl levelInfo parparts
+ Nothing ->
+ throwError WrongElem
+ Nothing -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let caption' = findChild (elemName ns "w" "tblPr") element
@@ -771,6 +754,7 @@ elemToParagraphStyle ns element sty
Just _ -> True
Nothing -> False
, pHeading = getParStyleField headingLev sty style
+ , pNumInfo = getParStyleField numInfo sty style
, pBlockQuote = getParStyleField isBlockQuote sty style
}
elemToParagraphStyle _ _ _ = defaultParagraphStyle
@@ -857,12 +841,26 @@ getBlockQuote ns element
, styleName `elem` blockQuoteStyleNames = Just True
getBlockQuote _ _ = Nothing
+getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo ns element = do
+ let numPr = findChild (elemName ns "w" "pPr") element >>=
+ findChild (elemName ns "w" "numPr")
+ lvl = fromMaybe "0" (numPr >>=
+ findChild (elemName ns "w" "ilvl") >>=
+ findAttr (elemName ns "w" "val"))
+ numId <- numPr >>=
+ findChild (elemName ns "w" "numId") >>=
+ findAttr (elemName ns "w" "val")
+ return (numId, lvl)
+
+
elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData
elemToParStyleData ns element parentStyle =
ParStyleData
{
headingLev = getHeaderLevel ns element
, isBlockQuote = getBlockQuote ns element
+ , numInfo = getNumInfo ns element
, psStyle = parentStyle
}
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
new file mode 100644
index 000000000..2901ea2a3
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
@@ -0,0 +1,106 @@
+module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
+ , defaultStyleMaps
+ , getStyleMaps
+ , getStyleId
+ , hasStyleName
+ ) where
+
+import Text.XML.Light
+import Text.Pandoc.Readers.Docx.Util
+import Control.Monad.State
+import Data.Char (toLower)
+import qualified Data.Map as M
+
+newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
+newtype CharStyleMap = CharStyleMap ( M.Map String String )
+
+class StyleMap a where
+ alterMap :: (M.Map String String -> M.Map String String) -> a -> a
+ getMap :: a -> M.Map String String
+
+instance StyleMap ParaStyleMap where
+ alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
+ getMap (ParaStyleMap m) = m
+
+instance StyleMap CharStyleMap where
+ alterMap f (CharStyleMap m) = CharStyleMap $ f m
+ getMap (CharStyleMap m) = m
+
+insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a
+insert (Just k) (Just v) m = alterMap (M.insert k v) m
+insert _ _ m = m
+
+getStyleId :: (StyleMap a) => String -> a -> String
+getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
+
+hasStyleName :: (StyleMap a) => String -> a -> Bool
+hasStyleName styleName = M.member (map toLower styleName) . getMap
+
+data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
+ , sParaStyleMap :: ParaStyleMap
+ , sCharStyleMap :: CharStyleMap
+ }
+
+data StyleType = ParaStyle | CharStyle
+
+defaultStyleMaps :: StyleMaps
+defaultStyleMaps = StyleMaps { sNameSpaces = []
+ , sParaStyleMap = ParaStyleMap M.empty
+ , sCharStyleMap = CharStyleMap M.empty
+ }
+
+type StateM a = State StyleMaps a
+
+getStyleMaps :: Element -> StyleMaps
+getStyleMaps docElem = execState genStyleMap state'
+ where
+ state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
+ genStyleItem e = do
+ styleType <- getStyleType e
+ styleId <- getAttrStyleId e
+ nameValLowercase <- fmap (map toLower) `fmap` getNameVal e
+ case styleType of
+ Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId
+ Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId
+ _ -> return ()
+ genStyleMap = do
+ style <- elemName' "style"
+ let styles = findChildren style docElem
+ forM_ styles genStyleItem
+
+modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM ()
+modParaStyleMap f = modify $ \s ->
+ s {sParaStyleMap = f $ sParaStyleMap s}
+
+modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM ()
+modCharStyleMap f = modify $ \s ->
+ s {sCharStyleMap = f $ sCharStyleMap s}
+
+getStyleType :: Element -> StateM (Maybe StyleType)
+getStyleType e = do
+ styleTypeStr <- getAttrType e
+ case styleTypeStr of
+ Just "paragraph" -> return $ Just ParaStyle
+ Just "character" -> return $ Just CharStyle
+ _ -> return Nothing
+
+getAttrType :: Element -> StateM (Maybe String)
+getAttrType el = do
+ name <- elemName' "type"
+ return $ findAttr name el
+
+getAttrStyleId :: Element -> StateM (Maybe String)
+getAttrStyleId el = do
+ name <- elemName' "styleId"
+ return $ findAttr name el
+
+getNameVal :: Element -> StateM (Maybe String)
+getNameVal el = do
+ name <- elemName' "name"
+ val <- elemName' "val"
+ return $ findChild name el >>= findAttr val
+
+elemName' :: String -> StateM QName
+elemName' name = do
+ namespaces <- gets sNameSpaces
+ return $ elemName namespaces "w" name
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
new file mode 100644
index 000000000..891f107b0
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -0,0 +1,26 @@
+module Text.Pandoc.Readers.Docx.Util (
+ NameSpaces
+ , elemName
+ , isElem
+ , elemToNameSpaces
+ ) where
+
+import Text.XML.Light
+import Data.Maybe (mapMaybe)
+
+type NameSpaces = [(String, String)]
+
+elemToNameSpaces :: Element -> NameSpaces
+elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+elemName :: NameSpaces -> String -> String -> QName
+elemName ns prefix name = QName name (lookup prefix ns) (Just prefix)
+
+isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem ns prefix name element =
+ qName (elName element) == name &&
+ qURI (elName element) == lookup prefix ns