diff options
author | Nikolay Yakimov <root@livid.pp.ru> | 2015-03-01 22:57:35 +0300 |
---|---|---|
committer | Nikolay Yakimov <root@livid.pp.ru> | 2015-03-01 22:57:35 +0300 |
commit | 409111f647d3efa403ff1efff12eebc3173017b5 (patch) | |
tree | 29f050eb4cd0841b91d1540dcca90ff9041ec1b2 | |
parent | 13daf3ed6a66698722fce7020bb64ee8700b5613 (diff) | |
download | pandoc-409111f647d3efa403ff1efff12eebc3173017b5.tar.gz |
Started moving StyleMap out of writer code
-rw-r--r-- | pandoc.cabal | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 26 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/StyleMap.hs | 105 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Util.hs | 26 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 71 |
5 files changed, 165 insertions, 67 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index 16106f896..32bbfbd26 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -338,7 +338,9 @@ Library Other-Modules: Text.Pandoc.Readers.Docx.Lists, Text.Pandoc.Readers.Docx.Reducible, Text.Pandoc.Readers.Docx.Parse, - Text.Pandoc.Readers.Docx.Fonts + Text.Pandoc.Readers.Docx.Fonts, + Text.Pandoc.Readers.Docx.Util, + Text.Pandoc.Readers.Docx.StyleMap Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index b644923c4..cce80fb48 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -65,6 +65,7 @@ import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) import Text.TeXMath (Exp) +import Text.Pandoc.Readers.Docx.Util import Data.Char (readLitChar, ord, chr, isDigit) data ReaderEnv = ReaderEnv { envNotes :: Notes @@ -108,8 +109,6 @@ mapD f xs = in concatMapM handler xs -type NameSpaces = [(String, String)] - data Docx = Docx Document deriving Show @@ -249,10 +248,6 @@ type ChangeId = String type Author = String type ChangeDate = String -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = do let notes = archiveToNotes archive @@ -269,7 +264,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do entry <- maybeToD $ findEntryByPath "word/document.xml" zf docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem body <- elemToBody namespaces bodyElem return $ Document namespaces body @@ -288,7 +283,7 @@ archiveToStyles zf = case stylesElem of Nothing -> (M.empty, M.empty) Just styElem -> - let namespaces = mapMaybe attrToNSPair (elAttribs styElem) + let namespaces = elemToNameSpaces styElem in ( M.fromList $ buildBasedOnList namespaces styElem (Nothing :: Maybe CharStyle), @@ -356,10 +351,10 @@ archiveToNotes zf = enElem = findEntryByPath "word/endnotes.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces fn = fnElem >>= (elemToNotes ns "footnote") @@ -459,7 +454,7 @@ archiveToNumbering' zf = do Nothing -> Just $ Numbering [] [] [] Just entry -> do numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + let namespaces = elemToNameSpaces numberingElem numElems = findChildren (QName "num" (lookup "w" namespaces) (Just "w")) numberingElem @@ -488,15 +483,6 @@ elemToNotes _ _ _ = Nothing --------------------------------------------- --------------------------------------------- -elemName :: NameSpaces -> String -> String -> QName -elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix)) - -isElem :: NameSpaces -> String -> String -> Element -> Bool -isElem ns prefix name element = - qName (elName element) == name && - qURI (elName element) == (lookup prefix ns) - - elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = let cols = findChildren (elemName ns "w" "gridCol") element diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs new file mode 100644 index 000000000..2e3d6db95 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -0,0 +1,105 @@ +module Text.Pandoc.Readers.Docx.StyleMap ( StyleMap + , ParaStyleMap + , CharStyleMap + , StyleMaps(..) + , defaultStyleMaps + , getStyleMaps + , getStyleId + , hasStyleName + ) where + +import Text.XML.Light +import Text.Pandoc.Readers.Docx.Util +import Control.Monad.State +import Data.Char (toLower) +import Data.Maybe (fromMaybe) +import qualified Data.Map as M + +newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) +newtype CharStyleMap = CharStyleMap ( M.Map String String ) + +class StyleMap a where + alterMap :: (M.Map String String -> M.Map String String) -> a -> a + getMap :: a -> M.Map String String + +instance StyleMap ParaStyleMap where + alterMap f (ParaStyleMap m) = ParaStyleMap $ f m + getMap (ParaStyleMap m) = m + +instance StyleMap CharStyleMap where + alterMap f (CharStyleMap m) = CharStyleMap $ f m + getMap (CharStyleMap m) = m + +insert :: (StyleMap a) => String -> String -> a -> a +insert k v = alterMap $ M.insert k v + +getStyleId :: (StyleMap a) => String -> a -> String +getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap + +hasStyleName :: (StyleMap a) => String -> a -> Bool +hasStyleName styleName = M.member (map toLower styleName) . getMap + +data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces + , sParaStyleMap :: ParaStyleMap + , sCharStyleMap :: CharStyleMap + } + +data StyleType = ParaStyle | CharStyle + +defaultStyleMaps :: StyleMaps +defaultStyleMaps = StyleMaps { sNameSpaces = [] + , sParaStyleMap = ParaStyleMap M.empty + , sCharStyleMap = CharStyleMap M.empty + } + +type StateM a = StateT StyleMaps Maybe a + +getStyleMaps :: Element -> StyleMaps +getStyleMaps docElem = fromMaybe state' $ execStateT genStyleMap state' + where + state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} + insertPara key val = modify $ \s -> + s { sParaStyleMap = insert key val $ sParaStyleMap s } + insertChar key val = modify $ \s -> + s { sCharStyleMap = insert key val $ sCharStyleMap s } + genStyleItem e = do + styleType <- getStyleType e + nameVal <- getNameVal e + styleId <- getAttrStyleId e + let nameValLC = map toLower nameVal + case styleType of + ParaStyle -> insertPara nameValLC styleId + CharStyle -> insertChar nameValLC styleId + genStyleMap = do + style <- elemName' "style" + let styles = findChildren style docElem + forM_ styles genStyleItem + +getStyleType :: Element -> StateM StyleType +getStyleType e = do + styleTypeStr <- getAttrType e + case styleTypeStr of + "paragraph" -> return ParaStyle + "character" -> return CharStyle + _ -> lift Nothing + +getAttrType :: Element -> StateM String +getAttrType el = do + name <- elemName' "type" + lift $ findAttr name el + +getAttrStyleId :: Element -> StateM String +getAttrStyleId el = do + name <- elemName' "styleId" + lift $ findAttr name el + +getNameVal :: Element -> StateM String +getNameVal el = do + name <- elemName' "name" + val <- elemName' "val" + lift $ findChild name el >>= findAttr val + +elemName' :: String -> StateM QName +elemName' name = do + namespaces <- gets sNameSpaces + return $ elemName namespaces "w" name diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs new file mode 100644 index 000000000..891f107b0 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -0,0 +1,26 @@ +module Text.Pandoc.Readers.Docx.Util ( + NameSpaces + , elemName + , isElem + , elemToNameSpaces + ) where + +import Text.XML.Light +import Data.Maybe (mapMaybe) + +type NameSpaces = [(String, String)] + +elemToNameSpaces :: Element -> NameSpaces +elemToNameSpaces = mapMaybe attrToNSPair . elAttribs + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = QName name (lookup prefix ns) (Just prefix) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + qName (elName element) == name && + qURI (elName element) == lookup prefix ns diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index eb7fa344b..53065309b 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -54,6 +54,8 @@ import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () import Text.XML.Light as XML import Text.TeXMath +import Text.Pandoc.Readers.Docx.StyleMap +import Text.Pandoc.Readers.Docx.Util (elemName) import Control.Monad.State import Text.Highlighting.Kate import Data.Unique (hashUnique, newUnique) @@ -64,7 +66,6 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) import Data.Maybe (fromMaybe, mapMaybe, maybeToList) -import Data.Char (toLower) data ListMarker = NoMarker | BulletMarker @@ -90,9 +91,6 @@ listMarkerToId (NumberMarker sty delim n) = OneParen -> '2' TwoParens -> '3' -newtype ParaStyleMap = ParaStyleMap (M.Map String String) deriving Show -newtype CharStyleMap = CharStyleMap (M.Map String String) deriving Show - data WriterState = WriterState{ stTextProperties :: [Element] , stParaProperties :: [Element] @@ -109,8 +107,7 @@ data WriterState = WriterState{ , stChangesAuthor :: String , stChangesDate :: String , stPrintWidth :: Integer - , stParaStyles :: ParaStyleMap - , stCharStyles :: CharStyleMap + , stStyleMaps :: StyleMaps , stFirstPara :: Bool } @@ -131,8 +128,7 @@ defaultWriterState = WriterState{ , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" , stPrintWidth = 1 - , stParaStyles = ParaStyleMap M.empty - , stCharStyles = CharStyleMap M.empty + , stStyleMaps = defaultStyleMaps , stFirstPara = False } @@ -220,28 +216,14 @@ writeDocx opts doc@(Pandoc meta _) = do styledoc <- parseXml refArchive distArchive stylepath -- parse styledoc for heading styles - let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . - filter ((==Just "xmlns") . qPrefix . attrKey) . - elAttribs $ styledoc - mywURI = lookup "w" styleNamespaces - myName name = QName name mywURI (Just "w") - getAttrStyleId = findAttr (myName "styleId") - getAttrType = findAttr (myName "type") - isParaStyle = (Just "paragraph" ==) . getAttrType - isCharStyle = (Just "character" ==) . getAttrType - getNameVal = findChild (myName "name") >=> findAttr (myName "val") >=> return . map toLower - genStyleItem f e | f e = liftM2 (,) <$> getNameVal <*> getAttrStyleId $ e - | otherwise = Nothing - genStyleMap f = M.fromList $ mapMaybe (genStyleItem f) $ findChildren (myName "style") styledoc - paraStyles = ParaStyleMap $ genStyleMap isParaStyle - charStyles = CharStyleMap $ genStyleMap isCharStyle + let styleMaps = getStyleMaps styledoc ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) - , stParaStyles = paraStyles - , stCharStyles = charStyles} + , stStyleMaps = styleMaps + } let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -394,7 +376,7 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml charStyles paraStyles $ writerHighlightStyle opts + let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } where modifyContent @@ -402,9 +384,10 @@ writeDocx opts doc@(Pandoc meta _) = do | otherwise = filter notTokStyle notTokStyle (Elem el) = notStyle el || notTokId el notTokStyle _ = True - notStyle = (/= myName "style") . elName - notTokId = maybe True (`notElem` tokStys) . getAttrStyleId + notStyle = (/= elemName' "style") . elName + notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) + elemName' = elemName (sNameSpaces styleMaps) "w" let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -481,12 +464,11 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: CharStyleMap -> ParaStyleMap -> Style -> [Element] -styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style = +styleToOpenXml :: StyleMaps -> Style -> [Element] +styleToOpenXml sm style = maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - styleExists m styleName = M.member (map toLower styleName) m - toStyle toktype | styleExists csm $ show toktype = Nothing + toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] @@ -509,7 +491,7 @@ styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style = tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< lookup toktype tokStyles) `mplus` backgroundColor style - parStyle | styleExists psm "Source Code" = Nothing + parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] @@ -638,30 +620,27 @@ writeOpenXML opts (Pandoc meta blocks) = do blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -getStyleId :: String -> M.Map String String -> String -getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) - -pStyle :: String -> ParaStyleMap -> Element -pStyle sty (ParaStyleMap m) = mknode "w:pStyle" [("w:val",sty')] () +pStyle :: String -> StyleMaps -> Element +pStyle sty m = mknode "w:pStyle" [("w:val",sty')] () where - sty' = getStyleId sty m + sty' = getStyleId sty $ sParaStyleMap m pCustomStyle :: String -> Element pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () pStyleM :: String -> WS XML.Element -pStyleM = flip fmap (gets stParaStyles) . pStyle +pStyleM = (`fmap` gets stStyleMaps) . pStyle -rStyle :: String -> CharStyleMap -> Element -rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () +rStyle :: String -> StyleMaps -> Element +rStyle sty m = mknode "w:rStyle" [("w:val",sty')] () where - sty' = getStyleId sty m + sty' = getStyleId sty $ sCharStyleMap m rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () rStyleM :: String -> WS XML.Element -rStyleM = flip fmap (gets stCharStyles) . rStyle +rStyleM = (`fmap` gets stStyleMaps) . rStyle getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -710,10 +689,10 @@ blockToOpenXML opts (Para lst) = do paraProps <- getParaProps $ case lst of [Math DisplayMath _] -> True _ -> False - pSM <- gets stParaStyles + sm <- gets stStyleMaps let paraProps' = case paraProps of [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] - [] -> [mknode "w:pPr" [] [pStyle "Body Text" pSM]] + [] -> [mknode "w:pPr" [] [pStyle "Body Text" sm]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst |