From 5cdd11725c2db417f7f93d09fdb7ead90d1700a6 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sat, 21 Feb 2015 22:20:18 +0300 Subject: Initial stab at more involved fix for #1607 This patch attempts to build a style name -> style id mapping based on styles.xml from reference doc, and changes pStyle and rStyle to accept style name as a parameter instead of styleId. There is a fallback mechanic that removes spaces from style name and returns it as style id, but it likely won't help much. Style names are matched lower-case, since headings and `footnote text` have lowercase names. --- src/Text/Pandoc/Writers/Docx.hs | 150 +++++++++++++++++++++++----------------- 1 file changed, 86 insertions(+), 64 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 441392918..437422451 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -64,7 +64,7 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) import Data.Maybe (fromMaybe, mapMaybe) -import Data.Char (isDigit) +import Data.Char (toLower) data ListMarker = NoMarker | BulletMarker @@ -90,6 +90,9 @@ listMarkerToId (NumberMarker sty delim n) = OneParen -> '2' TwoParens -> '3' +newtype ParaStyleMap = ParaStyleMap (M.Map String String) deriving Show +newtype CharStyleMap = CharStyleMap (M.Map String String) deriving Show + data WriterState = WriterState{ stTextProperties :: [Element] , stParaProperties :: [Element] @@ -106,7 +109,8 @@ data WriterState = WriterState{ , stChangesAuthor :: String , stChangesDate :: String , stPrintWidth :: Integer - , stHeadingStyles :: [(Int,String)] + , stParaStyles :: ParaStyleMap + , stCharStyles :: CharStyleMap , stFirstPara :: Bool } @@ -127,7 +131,8 @@ defaultWriterState = WriterState{ , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" , stPrintWidth = 1 - , stHeadingStyles = [] + , stParaStyles = ParaStyleMap M.empty + , stCharStyles = CharStyleMap M.empty , stFirstPara = False } @@ -218,29 +223,25 @@ writeDocx opts doc@(Pandoc meta _) = do let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . filter ((==Just "xmlns") . qPrefix . attrKey) . elAttribs $ styledoc - let headingStyles = - let - mywURI = lookup "w" styleNamespaces - myName name = QName name mywURI (Just "w") - getAttrStyleId = findAttr (myName "styleId") - getNameVal = findChild (myName "name") >=> findAttr (myName "val") - getNum s | not $ null s, all isDigit s = Just (read s :: Int) - | otherwise = Nothing - getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum - getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum - toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId - toMap getF = mapMaybe (toTuple getF) $ - findChildren (myName "style") styledoc - select a b | not $ null a = a - | otherwise = b - in - select (toMap getEngHeader) (toMap getIntHeader) + mywURI = lookup "w" styleNamespaces + myName name = QName name mywURI (Just "w") + getAttrStyleId = findAttr (myName "styleId") + getAttrType = findAttr (myName "type") + isParaStyle = (Just "paragraph" ==) . getAttrType + isCharStyle = (Just "character" ==) . getAttrType + getNameVal = findChild (myName "name") >=> findAttr (myName "val") >=> return . map toLower + genStyleItem f e | f e = liftM2 (,) <$> getNameVal <*> getAttrStyleId $ e + | otherwise = Nothing + genStyleMap f = M.fromList $ mapMaybe (genStyleItem f) $ findChildren (myName "style") styledoc + paraStyles = ParaStyleMap $ genStyleMap isParaStyle + charStyles = CharStyleMap $ genStyleMap isCharStyle ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) - , stHeadingStyles = headingStyles} + , stParaStyles = paraStyles + , stCharStyles = charStyles} let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -602,14 +603,14 @@ writeOpenXML opts (Pandoc meta blocks) = do Just (MetaBlocks [Para xs]) -> xs Just (MetaInlines xs) -> xs _ -> [] - title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $ + title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] + subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] + authors <- withParaPropM (pStyleM "Author") $ blocksToOpenXML opts $ map Para auths - date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] abstract <- if null abstract' then return [] - else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract' + else withParaPropM (pStyleM "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs @@ -623,11 +624,24 @@ writeOpenXML opts (Pandoc meta blocks) = do blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pStyle :: String -> Element -pStyle sty = mknode "w:pStyle" [("w:val",sty)] () +getStyleId :: String -> M.Map String String -> String +getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) + +pStyle :: String -> ParaStyleMap -> Element +pStyle sty (ParaStyleMap m) = mknode "w:pStyle" [("w:val",sty')] () + where + sty' = getStyleId sty m + +pStyleM :: String -> WS XML.Element +pStyleM = flip fmap (gets stParaStyles) . pStyle + +rStyle :: String -> CharStyleMap -> Element +rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () + where + sty' = getStyleId sty m -rStyle :: String -> Element -rStyle sty = mknode "w:rStyle" [("w:val",sty)] () +rStyleM :: String -> WS XML.Element +rStyleM = flip fmap (gets stCharStyles) . rStyle getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -641,13 +655,12 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do let (hs, bs') = span isHeaderBlock bs header <- blocksToOpenXML opts hs -- We put the Bibliography style on paragraphs after the header - rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs' + rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs' return (header ++ rest) blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do setFirstPara - headingStyles <- gets stHeadingStyles - paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $ + paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ getParaProps False contents <- inlinesToOpenXML opts lst usedIdents <- gets stSectionIds @@ -660,26 +673,27 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] -blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact") +blockToOpenXML opts (Plain lst) = withParaPropM (pStyleM "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do setFirstPara paraProps <- getParaProps False contents <- inlinesToOpenXML opts [Image alt (src,tit)] - captionNode <- withParaProp (pStyle "ImageCaption") + captionNode <- withParaPropM (pStyleM "Image Caption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -- fixDisplayMath sometimes produces a Para [] as artifact blockToOpenXML _ (Para []) = return [] blockToOpenXML opts (Para lst) = do - isFirstPara <- gets stFirstPara + isFirstPara <- gets stFirstPara paraProps <- getParaProps $ case lst of [Math DisplayMath _] -> True _ -> False + pSM <- gets stParaStyles let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "FirstParagraph")]] - [] -> [mknode "w:pPr" [] [(pStyle "BodyText")]] + [] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "First Paragraph" pSM)]] + [] -> [mknode "w:pPr" [] [(pStyle "Body Text" pSM)]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst @@ -688,11 +702,11 @@ blockToOpenXML _ (RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] blockToOpenXML opts (BlockQuote blocks) = do - p <- withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks + p <- withParaPropM (pStyleM "Block Quote") $ blocksToOpenXML opts blocks setFirstPara return p blockToOpenXML opts (CodeBlock attrs str) = do - p <- withParaProp (pStyle "SourceCode") $ (blockToOpenXML opts $ Para [Code attrs str]) + p <- withParaPropM (pStyleM "Source Code") (blockToOpenXML opts $ Para [Code attrs str]) setFirstPara return p blockToOpenXML _ HorizontalRule = do @@ -707,7 +721,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do let captionStr = stringify caption caption' <- if null caption then return [] - else withParaProp (pStyle "TableCaption") + else withParaPropM (pStyleM "Table Caption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) @@ -767,9 +781,9 @@ blockToOpenXML opts (DefinitionList items) = do definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaProp (pStyle "DefinitionTerm") + term' <- withParaPropM (pStyleM "Definition Term") $ blockToOpenXML opts (Para term) - defs' <- withParaProp (pStyle "Definition") + defs' <- withParaPropM (pStyleM "Definition") $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' @@ -833,6 +847,9 @@ withTextProp d p = do popTextProp return res +withTextPropM :: WS Element -> WS a -> WS a +withTextPropM = (. flip withTextProp) . (>>=) + getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do props <- gets stParaProperties @@ -861,6 +878,9 @@ withParaProp d p = do popParaProp return res +withParaPropM :: WS Element -> WS a -> WS a +withParaPropM = (. flip withParaProp) . (>>=) + formattedString :: String -> WS [Element] formattedString str = do props <- getTextProps @@ -943,25 +963,27 @@ inlineToOpenXML opts (Math mathType str) = do Right r -> return [r] Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML opts (Code attrs str) = - withTextProp (rStyle "VerbatimChar") - $ if writerHighlight opts - then case highlight formatOpenXML attrs str of - Nothing -> unhighlighted - Just h -> return h - else unhighlighted - where unhighlighted = intercalate [br] `fmap` - (mapM formattedString $ lines str) - formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) - toHlTok (toktype,tok) = mknode "w:r" [] - [ mknode "w:rPr" [] - [ rStyle $ show toktype ] - , mknode "w:t" [("xml:space","preserve")] tok ] +inlineToOpenXML opts (Code attrs str) = do + rSM <- gets stCharStyles + let unhighlighted = intercalate [br] `fmap` + (mapM formattedString $ lines str) + formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) + toHlTok (toktype,tok) = mknode "w:r" [] + [ mknode "w:rPr" [] + [ rStyle (show toktype) rSM ] + , mknode "w:t" [("xml:space","preserve")] tok ] + withTextProp (rStyle "Verbatim Char" rSM) + $ if writerHighlight opts + then case highlight formatOpenXML attrs str of + Nothing -> unhighlighted + Just h -> return h + else unhighlighted inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId + rSM <- gets stCharStyles let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] (rStyle "Footnote Ref" rSM) , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -971,22 +993,22 @@ inlineToOpenXML opts (Note bs) = do oldParaProperties <- gets stParaProperties oldTextProperties <- gets stTextProperties modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] } - contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts + contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts $ insertNoteRef bs modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties, stTextProperties = oldTextProperties } let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] (rStyle "Footnote Ref" rSM) , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Link") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Link") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i @@ -1088,7 +1110,7 @@ defaultFootnotes = [ mknode "w:footnote" [ mknode "w:p" [] $ [ mknode "w:r" [] $ [ mknode "w:continuationSeparator" [] ()]]]] - + parseXml :: Archive -> Archive -> String -> IO Element parseXml refArchive distArchive relpath = case ((findEntryByPath relpath refArchive `mplus` -- cgit v1.2.3 From 80715ecd7a39288aef501b3550b45cb2f121df10 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 22 Feb 2015 00:19:58 +0300 Subject: Prototype fix for #1872 --- src/Text/Pandoc/Writers/Docx.hs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 437422451..a240997ab 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -394,7 +394,7 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml $ writerHighlightStyle opts + let newstyles = styleToOpenXml charStyles $ writerHighlightStyle opts let styledoc' = styledoc{ elContent = elContent styledoc ++ [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' @@ -473,10 +473,13 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: Style -> [Element] -styleToOpenXml style = parStyle : map toStyle alltoktypes +styleToOpenXml :: CharStyleMap -> Style -> [Element] +styleToOpenXml (CharStyleMap m) style = parStyle : mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype = mknode "w:style" [("w:type","character"), + toStyle toktype = + if M.member (map toLower $ show toktype) m then Nothing + else Just $ + mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] [ mknode "w:name" [("w:val",show toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () -- cgit v1.2.3 From 8b3acde9deaeb30ba75299001ea1b15345983f3c Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 22 Feb 2015 23:25:12 +0300 Subject: If --no-highlight is set, remove *Tok styles. --- src/Text/Pandoc/Writers/Docx.hs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index a240997ab..64da9a497 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -395,8 +395,16 @@ writeDocx opts doc@(Pandoc meta _) = do -- styles let newstyles = styleToOpenXml charStyles $ writerHighlightStyle opts - let styledoc' = styledoc{ elContent = elContent styledoc ++ - [Elem x | x <- newstyles, writerHighlight opts] } + let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } + where + modifyContent + | writerHighlight opts = (++ map Elem newstyles) + | otherwise = filter notTokStyle + notTokStyle (Elem el) = notStyle el || notTokId el + notTokStyle _ = True + notStyle = (/= myName "style") . elName + notTokId = maybe True (`notElem` tokStys) . getAttrStyleId + tokStys = map show $ enumFromTo KeywordTok NormalTok let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml -- cgit v1.2.3 From 7ae7f0c051b83e41c8bb4c0f15a2b57f76cd6298 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Mon, 23 Feb 2015 01:53:47 +0300 Subject: Also skip SourceCode style if exists --- src/Text/Pandoc/Writers/Docx.hs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 64da9a497..ba8a28de0 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -63,7 +63,7 @@ import qualified Control.Exception as E import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) -import Data.Maybe (fromMaybe, mapMaybe) +import Data.Maybe (fromMaybe, mapMaybe, maybeToList) import Data.Char (toLower) data ListMarker = NoMarker @@ -394,7 +394,7 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml charStyles $ writerHighlightStyle opts + let newstyles = styleToOpenXml charStyles paraStyles $ writerHighlightStyle opts let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } where modifyContent @@ -404,7 +404,7 @@ writeDocx opts doc@(Pandoc meta _) = do notTokStyle _ = True notStyle = (/= myName "style") . elName notTokId = maybe True (`notElem` tokStys) . getAttrStyleId - tokStys = map show $ enumFromTo KeywordTok NormalTok + tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -481,12 +481,13 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: CharStyleMap -> Style -> [Element] -styleToOpenXml (CharStyleMap m) style = parStyle : mapMaybe toStyle alltoktypes +styleToOpenXml :: CharStyleMap -> ParaStyleMap -> Style -> [Element] +styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style = + maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype = - if M.member (map toLower $ show toktype) m then Nothing - else Just $ + styleExists m styleName = M.member (map toLower styleName) m + toStyle toktype | styleExists csm $ show toktype = Nothing + | otherwise = Just $ mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] [ mknode "w:name" [("w:val",show toktype)] () @@ -508,7 +509,9 @@ styleToOpenXml (CharStyleMap m) style = parStyle : mapMaybe toStyle alltoktypes tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< lookup toktype tokStyles) `mplus` backgroundColor style - parStyle = mknode "w:style" [("w:type","paragraph"), + parStyle | styleExists psm "Source Code" = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] [ mknode "w:name" [("w:val","Source Code")] () , mknode "w:basedOn" [("w:val","Normal")] () -- cgit v1.2.3 From 47c70b91313dd5e907efd34d5a26d908b625c476 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Mon, 23 Feb 2015 02:05:32 +0300 Subject: Do not lookup custom styles --- src/Text/Pandoc/Writers/Docx.hs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index ba8a28de0..f217dd9bc 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -646,6 +646,9 @@ pStyle sty (ParaStyleMap m) = mknode "w:pStyle" [("w:val",sty')] () where sty' = getStyleId sty m +pCustomStyle :: String -> Element +pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () + pStyleM :: String -> WS XML.Element pStyleM = flip fmap (gets stParaStyles) . pStyle @@ -654,6 +657,9 @@ rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () where sty' = getStyleId sty m +rCustomStyle :: String -> Element +rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () + rStyleM :: String -> WS XML.Element rStyleM = flip fmap (gets stCharStyles) . rStyle @@ -720,7 +726,7 @@ blockToOpenXML opts (BlockQuote blocks) = do setFirstPara return p blockToOpenXML opts (CodeBlock attrs str) = do - p <- withParaPropM (pStyleM "Source Code") (blockToOpenXML opts $ Para [Code attrs str]) + p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str]) setFirstPara return p blockToOpenXML _ HorizontalRule = do @@ -978,15 +984,14 @@ inlineToOpenXML opts (Math mathType str) = do Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML opts (Code attrs str) = do - rSM <- gets stCharStyles let unhighlighted = intercalate [br] `fmap` (mapM formattedString $ lines str) formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] [ mknode "w:rPr" [] - [ rStyle (show toktype) rSM ] + [ rCustomStyle (show toktype) ] , mknode "w:t" [("xml:space","preserve")] tok ] - withTextProp (rStyle "Verbatim Char" rSM) + withTextProp (rCustomStyle "VerbatimChar") $ if writerHighlight opts then case highlight formatOpenXML attrs str of Nothing -> unhighlighted @@ -995,9 +1000,8 @@ inlineToOpenXML opts (Code attrs str) = do inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId - rSM <- gets stCharStyles let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "Footnote Ref" rSM) + [ mknode "w:rPr" [] (rCustomStyle "FootnoteRef") , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -1014,15 +1018,15 @@ inlineToOpenXML opts (Note bs) = do let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "Footnote Ref" rSM) + [ mknode "w:rPr" [] (rCustomStyle "FootnoteRef") , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextPropM (rStyleM "Link") $ inlinesToOpenXML opts txt + contents <- withTextProp (rCustomStyle "Link") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextPropM (rStyleM "Link") $ inlinesToOpenXML opts txt + contents <- withTextProp (rCustomStyle "Link") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i -- cgit v1.2.3 From 908a47e4b9c9ad5cc1fd2f5e551ef0fd98d93178 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 24 Feb 2015 02:31:14 +0300 Subject: Treat some ambiguous styles as custom for now * Author * Abstract * Compact * ImageCaption * TableCaption * DefinitionTerm * Definition * FirstParagraph --- docxstyles.txt | 44 +++++++++++++++++++++++++++++++++++++++++ src/Text/Pandoc/Writers/Docx.hs | 21 ++++++++++---------- 2 files changed, 54 insertions(+), 11 deletions(-) create mode 100644 docxstyles.txt (limited to 'src/Text') diff --git a/docxstyles.txt b/docxstyles.txt new file mode 100644 index 000000000..6bc405d15 --- /dev/null +++ b/docxstyles.txt @@ -0,0 +1,44 @@ +| Name | Id | custom | word | type | alt | +|:-----------------------|:---------------------|:------:|:----:|:----:|:---------------------------------| +| Title | Title | | + | p | | +| Subtitle | Subtitle | | + | p | | +| Author | Author | ? | | p | ? | +| Date | Date | | + | p | | +| Abstract | Abstract | ? | | p | ? | +| Bibliography | Bibliography | | + | p | | +| Heading 1 | Heading1 | | + | p | | +| Heading 2 | Heading2 | | + | p | | +| Heading 3 | Heading3 | | + | p | | +| Heading 4 | Heading4 | | + | p | | +| Heading 5 | Heading5 | | + | p | | +| Compact | Compact | ? | | p | ? | +| Image Caption | ImageCaption | | | p | caption | +| First Paragraph | FirstParagraph | | ??? | p | | +| Body Text | BodyText | | + | p | | +| Block Quote | BlockQuote | | | p | Intense Quote, Block Text, Quote | +| Source Code | SourceCode | + | | p | | +| Table Caption | TableCaption | | | p | caption | +| Definition Term | DefinitionTerm | ? | | p | ? | +| Definition | Definition | ? | | p | ? | +| Verbatim Char | VerbatimChar | + | | c | | +| Footnote Ref | FootnoteRef | + | | c | footnote reference | +| Footnote Text | FootnoteText | | + | p | | +| Link | Link | + | | c | Hyperlink | +| Normal | Normal | | d | p | | +| Default Paragraph Font | DefaultParagraphFont | | d | c | | +| Normal Table | TableNormal | | d | t | | +| Body Text Char | BodyTextChar | + | | c | | + + +| Name | ambiguous | type | can be replaced by | +|:----------------|:---------:|:----:|:---------------------------------| +| Author | yes | p | ? | +| Abstract | yes | p | ? | +| Compact | yes | p | ? | +| Image Caption | yes | p | caption | +| Block Quote | yes | p | Intense Quote, Block Text, Quote | +| Table Caption | yes | p | caption | +| Definition Term | yes | p | ? | +| Definition | yes | p | ? | +| Link | no | c | Hyperlink | +| Footnote Ref | no | c | footnote reference | diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index f217dd9bc..9984c243f 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -619,12 +619,12 @@ writeOpenXML opts (Pandoc meta blocks) = do _ -> [] title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaPropM (pStyleM "Author") $ blocksToOpenXML opts $ + authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ map Para auths date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] abstract <- if null abstract' then return [] - else withParaPropM (pStyleM "Abstract") $ blocksToOpenXML opts abstract' + else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs @@ -693,14 +693,14 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] -blockToOpenXML opts (Plain lst) = withParaPropM (pStyleM "Compact") +blockToOpenXML opts (Plain lst) = withParaProp (pCustomStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do setFirstPara paraProps <- getParaProps False contents <- inlinesToOpenXML opts [Image alt (src,tit)] - captionNode <- withParaPropM (pStyleM "Image Caption") + captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -- fixDisplayMath sometimes produces a Para [] as artifact @@ -712,8 +712,8 @@ blockToOpenXML opts (Para lst) = do _ -> False pSM <- gets stParaStyles let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "First Paragraph" pSM)]] - [] -> [mknode "w:pPr" [] [(pStyle "Body Text" pSM)]] + [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [pStyle "Body Text" pSM]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst @@ -741,7 +741,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do let captionStr = stringify caption caption' <- if null caption then return [] - else withParaPropM (pStyleM "Table Caption") + else withParaProp (pCustomStyle "TableCaption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) @@ -752,8 +752,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] - let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] - [mknode "w:pStyle" [("w:val","Compact")] ()]]] + let emptyCell = [mknode "w:p" [] [pCustomStyle "Compact"]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents @@ -801,9 +800,9 @@ blockToOpenXML opts (DefinitionList items) = do definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaPropM (pStyleM "Definition Term") + term' <- withParaProp (pCustomStyle "DefinitionTerm") $ blockToOpenXML opts (Para term) - defs' <- withParaPropM (pStyleM "Definition") + defs' <- withParaProp (pCustomStyle "Definition") $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' -- cgit v1.2.3 From ba153585db4b124ed86245c04e6275c6ed0c4049 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 24 Feb 2015 02:34:46 +0300 Subject: Comment out unused functions to make CI happy --- src/Text/Pandoc/Writers/Docx.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 9984c243f..ebd060d38 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -652,16 +652,16 @@ pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () pStyleM :: String -> WS XML.Element pStyleM = flip fmap (gets stParaStyles) . pStyle -rStyle :: String -> CharStyleMap -> Element -rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () - where - sty' = getStyleId sty m +-- rStyle :: String -> CharStyleMap -> Element +-- rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () +-- where +-- sty' = getStyleId sty m rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () -rStyleM :: String -> WS XML.Element -rStyleM = flip fmap (gets stCharStyles) . rStyle +-- rStyleM :: String -> WS XML.Element +-- rStyleM = flip fmap (gets stCharStyles) . rStyle getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -866,8 +866,8 @@ withTextProp d p = do popTextProp return res -withTextPropM :: WS Element -> WS a -> WS a -withTextPropM = (. flip withTextProp) . (>>=) +-- withTextPropM :: WS Element -> WS a -> WS a +-- withTextPropM = (. flip withTextProp) . (>>=) getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do -- cgit v1.2.3 From 13daf3ed6a66698722fce7020bb64ee8700b5613 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 1 Mar 2015 18:49:44 +0300 Subject: Update Docx writer for 1cb601d reference.docx --- src/Text/Pandoc/Writers/Docx.hs | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index ebd060d38..eb7fa344b 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -652,16 +652,16 @@ pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () pStyleM :: String -> WS XML.Element pStyleM = flip fmap (gets stParaStyles) . pStyle --- rStyle :: String -> CharStyleMap -> Element --- rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () --- where --- sty' = getStyleId sty m +rStyle :: String -> CharStyleMap -> Element +rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () + where + sty' = getStyleId sty m rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () --- rStyleM :: String -> WS XML.Element --- rStyleM = flip fmap (gets stCharStyles) . rStyle +rStyleM :: String -> WS XML.Element +rStyleM = flip fmap (gets stCharStyles) . rStyle getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -722,7 +722,7 @@ blockToOpenXML _ (RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] blockToOpenXML opts (BlockQuote blocks) = do - p <- withParaPropM (pStyleM "Block Quote") $ blocksToOpenXML opts blocks + p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks setFirstPara return p blockToOpenXML opts (CodeBlock attrs str) = do @@ -866,8 +866,8 @@ withTextProp d p = do popTextProp return res --- withTextPropM :: WS Element -> WS a -> WS a --- withTextPropM = (. flip withTextProp) . (>>=) +withTextPropM :: WS Element -> WS a -> WS a +withTextPropM = (. flip withTextProp) . (>>=) getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do @@ -999,8 +999,9 @@ inlineToOpenXML opts (Code attrs str) = do inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId + footnoteStyle <- rStyleM "Footnote Reference" let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rCustomStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -1017,15 +1018,15 @@ inlineToOpenXML opts (Note bs) = do let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rCustomStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextProp (rCustomStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextProp (rCustomStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i -- cgit v1.2.3 From 409111f647d3efa403ff1efff12eebc3173017b5 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 1 Mar 2015 22:57:35 +0300 Subject: Started moving StyleMap out of writer code --- pandoc.cabal | 4 +- src/Text/Pandoc/Readers/Docx/Parse.hs | 26 ++------ src/Text/Pandoc/Readers/Docx/StyleMap.hs | 105 +++++++++++++++++++++++++++++++ src/Text/Pandoc/Readers/Docx/Util.hs | 26 ++++++++ src/Text/Pandoc/Writers/Docx.hs | 71 ++++++++------------- 5 files changed, 165 insertions(+), 67 deletions(-) create mode 100644 src/Text/Pandoc/Readers/Docx/StyleMap.hs create mode 100644 src/Text/Pandoc/Readers/Docx/Util.hs (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index 16106f896..32bbfbd26 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -338,7 +338,9 @@ Library Other-Modules: Text.Pandoc.Readers.Docx.Lists, Text.Pandoc.Readers.Docx.Reducible, Text.Pandoc.Readers.Docx.Parse, - Text.Pandoc.Readers.Docx.Fonts + Text.Pandoc.Readers.Docx.Fonts, + Text.Pandoc.Readers.Docx.Util, + Text.Pandoc.Readers.Docx.StyleMap Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index b644923c4..cce80fb48 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -65,6 +65,7 @@ import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) import Text.TeXMath (Exp) +import Text.Pandoc.Readers.Docx.Util import Data.Char (readLitChar, ord, chr, isDigit) data ReaderEnv = ReaderEnv { envNotes :: Notes @@ -108,8 +109,6 @@ mapD f xs = in concatMapM handler xs -type NameSpaces = [(String, String)] - data Docx = Docx Document deriving Show @@ -249,10 +248,6 @@ type ChangeId = String type Author = String type ChangeDate = String -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = do let notes = archiveToNotes archive @@ -269,7 +264,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do entry <- maybeToD $ findEntryByPath "word/document.xml" zf docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem body <- elemToBody namespaces bodyElem return $ Document namespaces body @@ -288,7 +283,7 @@ archiveToStyles zf = case stylesElem of Nothing -> (M.empty, M.empty) Just styElem -> - let namespaces = mapMaybe attrToNSPair (elAttribs styElem) + let namespaces = elemToNameSpaces styElem in ( M.fromList $ buildBasedOnList namespaces styElem (Nothing :: Maybe CharStyle), @@ -356,10 +351,10 @@ archiveToNotes zf = enElem = findEntryByPath "word/endnotes.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces fn = fnElem >>= (elemToNotes ns "footnote") @@ -459,7 +454,7 @@ archiveToNumbering' zf = do Nothing -> Just $ Numbering [] [] [] Just entry -> do numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + let namespaces = elemToNameSpaces numberingElem numElems = findChildren (QName "num" (lookup "w" namespaces) (Just "w")) numberingElem @@ -488,15 +483,6 @@ elemToNotes _ _ _ = Nothing --------------------------------------------- --------------------------------------------- -elemName :: NameSpaces -> String -> String -> QName -elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix)) - -isElem :: NameSpaces -> String -> String -> Element -> Bool -isElem ns prefix name element = - qName (elName element) == name && - qURI (elName element) == (lookup prefix ns) - - elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = let cols = findChildren (elemName ns "w" "gridCol") element diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs new file mode 100644 index 000000000..2e3d6db95 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -0,0 +1,105 @@ +module Text.Pandoc.Readers.Docx.StyleMap ( StyleMap + , ParaStyleMap + , CharStyleMap + , StyleMaps(..) + , defaultStyleMaps + , getStyleMaps + , getStyleId + , hasStyleName + ) where + +import Text.XML.Light +import Text.Pandoc.Readers.Docx.Util +import Control.Monad.State +import Data.Char (toLower) +import Data.Maybe (fromMaybe) +import qualified Data.Map as M + +newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) +newtype CharStyleMap = CharStyleMap ( M.Map String String ) + +class StyleMap a where + alterMap :: (M.Map String String -> M.Map String String) -> a -> a + getMap :: a -> M.Map String String + +instance StyleMap ParaStyleMap where + alterMap f (ParaStyleMap m) = ParaStyleMap $ f m + getMap (ParaStyleMap m) = m + +instance StyleMap CharStyleMap where + alterMap f (CharStyleMap m) = CharStyleMap $ f m + getMap (CharStyleMap m) = m + +insert :: (StyleMap a) => String -> String -> a -> a +insert k v = alterMap $ M.insert k v + +getStyleId :: (StyleMap a) => String -> a -> String +getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap + +hasStyleName :: (StyleMap a) => String -> a -> Bool +hasStyleName styleName = M.member (map toLower styleName) . getMap + +data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces + , sParaStyleMap :: ParaStyleMap + , sCharStyleMap :: CharStyleMap + } + +data StyleType = ParaStyle | CharStyle + +defaultStyleMaps :: StyleMaps +defaultStyleMaps = StyleMaps { sNameSpaces = [] + , sParaStyleMap = ParaStyleMap M.empty + , sCharStyleMap = CharStyleMap M.empty + } + +type StateM a = StateT StyleMaps Maybe a + +getStyleMaps :: Element -> StyleMaps +getStyleMaps docElem = fromMaybe state' $ execStateT genStyleMap state' + where + state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} + insertPara key val = modify $ \s -> + s { sParaStyleMap = insert key val $ sParaStyleMap s } + insertChar key val = modify $ \s -> + s { sCharStyleMap = insert key val $ sCharStyleMap s } + genStyleItem e = do + styleType <- getStyleType e + nameVal <- getNameVal e + styleId <- getAttrStyleId e + let nameValLC = map toLower nameVal + case styleType of + ParaStyle -> insertPara nameValLC styleId + CharStyle -> insertChar nameValLC styleId + genStyleMap = do + style <- elemName' "style" + let styles = findChildren style docElem + forM_ styles genStyleItem + +getStyleType :: Element -> StateM StyleType +getStyleType e = do + styleTypeStr <- getAttrType e + case styleTypeStr of + "paragraph" -> return ParaStyle + "character" -> return CharStyle + _ -> lift Nothing + +getAttrType :: Element -> StateM String +getAttrType el = do + name <- elemName' "type" + lift $ findAttr name el + +getAttrStyleId :: Element -> StateM String +getAttrStyleId el = do + name <- elemName' "styleId" + lift $ findAttr name el + +getNameVal :: Element -> StateM String +getNameVal el = do + name <- elemName' "name" + val <- elemName' "val" + lift $ findChild name el >>= findAttr val + +elemName' :: String -> StateM QName +elemName' name = do + namespaces <- gets sNameSpaces + return $ elemName namespaces "w" name diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs new file mode 100644 index 000000000..891f107b0 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -0,0 +1,26 @@ +module Text.Pandoc.Readers.Docx.Util ( + NameSpaces + , elemName + , isElem + , elemToNameSpaces + ) where + +import Text.XML.Light +import Data.Maybe (mapMaybe) + +type NameSpaces = [(String, String)] + +elemToNameSpaces :: Element -> NameSpaces +elemToNameSpaces = mapMaybe attrToNSPair . elAttribs + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = QName name (lookup prefix ns) (Just prefix) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + qName (elName element) == name && + qURI (elName element) == lookup prefix ns diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index eb7fa344b..53065309b 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -54,6 +54,8 @@ import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () import Text.XML.Light as XML import Text.TeXMath +import Text.Pandoc.Readers.Docx.StyleMap +import Text.Pandoc.Readers.Docx.Util (elemName) import Control.Monad.State import Text.Highlighting.Kate import Data.Unique (hashUnique, newUnique) @@ -64,7 +66,6 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) import Data.Maybe (fromMaybe, mapMaybe, maybeToList) -import Data.Char (toLower) data ListMarker = NoMarker | BulletMarker @@ -90,9 +91,6 @@ listMarkerToId (NumberMarker sty delim n) = OneParen -> '2' TwoParens -> '3' -newtype ParaStyleMap = ParaStyleMap (M.Map String String) deriving Show -newtype CharStyleMap = CharStyleMap (M.Map String String) deriving Show - data WriterState = WriterState{ stTextProperties :: [Element] , stParaProperties :: [Element] @@ -109,8 +107,7 @@ data WriterState = WriterState{ , stChangesAuthor :: String , stChangesDate :: String , stPrintWidth :: Integer - , stParaStyles :: ParaStyleMap - , stCharStyles :: CharStyleMap + , stStyleMaps :: StyleMaps , stFirstPara :: Bool } @@ -131,8 +128,7 @@ defaultWriterState = WriterState{ , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" , stPrintWidth = 1 - , stParaStyles = ParaStyleMap M.empty - , stCharStyles = CharStyleMap M.empty + , stStyleMaps = defaultStyleMaps , stFirstPara = False } @@ -220,28 +216,14 @@ writeDocx opts doc@(Pandoc meta _) = do styledoc <- parseXml refArchive distArchive stylepath -- parse styledoc for heading styles - let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . - filter ((==Just "xmlns") . qPrefix . attrKey) . - elAttribs $ styledoc - mywURI = lookup "w" styleNamespaces - myName name = QName name mywURI (Just "w") - getAttrStyleId = findAttr (myName "styleId") - getAttrType = findAttr (myName "type") - isParaStyle = (Just "paragraph" ==) . getAttrType - isCharStyle = (Just "character" ==) . getAttrType - getNameVal = findChild (myName "name") >=> findAttr (myName "val") >=> return . map toLower - genStyleItem f e | f e = liftM2 (,) <$> getNameVal <*> getAttrStyleId $ e - | otherwise = Nothing - genStyleMap f = M.fromList $ mapMaybe (genStyleItem f) $ findChildren (myName "style") styledoc - paraStyles = ParaStyleMap $ genStyleMap isParaStyle - charStyles = CharStyleMap $ genStyleMap isCharStyle + let styleMaps = getStyleMaps styledoc ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) - , stParaStyles = paraStyles - , stCharStyles = charStyles} + , stStyleMaps = styleMaps + } let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -394,7 +376,7 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml charStyles paraStyles $ writerHighlightStyle opts + let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } where modifyContent @@ -402,9 +384,10 @@ writeDocx opts doc@(Pandoc meta _) = do | otherwise = filter notTokStyle notTokStyle (Elem el) = notStyle el || notTokId el notTokStyle _ = True - notStyle = (/= myName "style") . elName - notTokId = maybe True (`notElem` tokStys) . getAttrStyleId + notStyle = (/= elemName' "style") . elName + notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) + elemName' = elemName (sNameSpaces styleMaps) "w" let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -481,12 +464,11 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: CharStyleMap -> ParaStyleMap -> Style -> [Element] -styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style = +styleToOpenXml :: StyleMaps -> Style -> [Element] +styleToOpenXml sm style = maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - styleExists m styleName = M.member (map toLower styleName) m - toStyle toktype | styleExists csm $ show toktype = Nothing + toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] @@ -509,7 +491,7 @@ styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style = tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< lookup toktype tokStyles) `mplus` backgroundColor style - parStyle | styleExists psm "Source Code" = Nothing + parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] @@ -638,30 +620,27 @@ writeOpenXML opts (Pandoc meta blocks) = do blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -getStyleId :: String -> M.Map String String -> String -getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) - -pStyle :: String -> ParaStyleMap -> Element -pStyle sty (ParaStyleMap m) = mknode "w:pStyle" [("w:val",sty')] () +pStyle :: String -> StyleMaps -> Element +pStyle sty m = mknode "w:pStyle" [("w:val",sty')] () where - sty' = getStyleId sty m + sty' = getStyleId sty $ sParaStyleMap m pCustomStyle :: String -> Element pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () pStyleM :: String -> WS XML.Element -pStyleM = flip fmap (gets stParaStyles) . pStyle +pStyleM = (`fmap` gets stStyleMaps) . pStyle -rStyle :: String -> CharStyleMap -> Element -rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () +rStyle :: String -> StyleMaps -> Element +rStyle sty m = mknode "w:rStyle" [("w:val",sty')] () where - sty' = getStyleId sty m + sty' = getStyleId sty $ sCharStyleMap m rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () rStyleM :: String -> WS XML.Element -rStyleM = flip fmap (gets stCharStyles) . rStyle +rStyleM = (`fmap` gets stStyleMaps) . rStyle getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -710,10 +689,10 @@ blockToOpenXML opts (Para lst) = do paraProps <- getParaProps $ case lst of [Math DisplayMath _] -> True _ -> False - pSM <- gets stParaStyles + sm <- gets stStyleMaps let paraProps' = case paraProps of [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] - [] -> [mknode "w:pPr" [] [pStyle "Body Text" pSM]] + [] -> [mknode "w:pPr" [] [pStyle "Body Text" sm]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst -- cgit v1.2.3 From 65c80822e7900e92b4bba67912da77062654cc26 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 3 Mar 2015 13:08:52 +0300 Subject: Code cleanup --- src/Text/Pandoc/Readers/Docx/StyleMap.hs | 24 ++++++++++++------------ src/Text/Pandoc/Writers/Docx.hs | 24 ++++++++++-------------- 2 files changed, 22 insertions(+), 26 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs index 2e3d6db95..5a4e9cfc2 100644 --- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -1,7 +1,4 @@ -module Text.Pandoc.Readers.Docx.StyleMap ( StyleMap - , ParaStyleMap - , CharStyleMap - , StyleMaps(..) +module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) , defaultStyleMaps , getStyleMaps , getStyleId @@ -58,23 +55,26 @@ getStyleMaps :: Element -> StyleMaps getStyleMaps docElem = fromMaybe state' $ execStateT genStyleMap state' where state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} - insertPara key val = modify $ \s -> - s { sParaStyleMap = insert key val $ sParaStyleMap s } - insertChar key val = modify $ \s -> - s { sCharStyleMap = insert key val $ sCharStyleMap s } genStyleItem e = do styleType <- getStyleType e - nameVal <- getNameVal e styleId <- getAttrStyleId e - let nameValLC = map toLower nameVal + nameValLowercase <- map toLower `fmap` getNameVal e case styleType of - ParaStyle -> insertPara nameValLC styleId - CharStyle -> insertChar nameValLC styleId + ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId + CharStyle -> modCharStyleMap $ insert nameValLowercase styleId genStyleMap = do style <- elemName' "style" let styles = findChildren style docElem forM_ styles genStyleItem +modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM () +modParaStyleMap f = modify $ \s -> + s {sParaStyleMap = f $ sParaStyleMap s} + +modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM () +modCharStyleMap f = modify $ \s -> + s {sCharStyleMap = f $ sCharStyleMap s} + getStyleType :: Element -> StateM StyleType getStyleType e = do styleTypeStr <- getAttrType e diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 53065309b..c4de12d2f 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -620,27 +620,23 @@ writeOpenXML opts (Pandoc meta blocks) = do blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pStyle :: String -> StyleMaps -> Element -pStyle sty m = mknode "w:pStyle" [("w:val",sty')] () - where - sty' = getStyleId sty $ sParaStyleMap m - pCustomStyle :: String -> Element pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () pStyleM :: String -> WS XML.Element -pStyleM = (`fmap` gets stStyleMaps) . pStyle - -rStyle :: String -> StyleMaps -> Element -rStyle sty m = mknode "w:rStyle" [("w:val",sty')] () - where - sty' = getStyleId sty $ sCharStyleMap m +pStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sParaStyleMap styleMaps + return $ mknode "w:pStyle" [("w:val",sty')] () rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () rStyleM :: String -> WS XML.Element -rStyleM = (`fmap` gets stStyleMaps) . rStyle +rStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sCharStyleMap styleMaps + return $ mknode "w:rStyle" [("w:val",sty')] () getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -689,10 +685,10 @@ blockToOpenXML opts (Para lst) = do paraProps <- getParaProps $ case lst of [Math DisplayMath _] -> True _ -> False - sm <- gets stStyleMaps + bodyTextStyle <- pStyleM "Body Text" let paraProps' = case paraProps of [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] - [] -> [mknode "w:pPr" [] [pStyle "Body Text" sm]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst -- cgit v1.2.3 From c0c9b313e6109e0f390cdda1bb868e394faae21b Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 8 Mar 2015 04:42:23 +0300 Subject: Docx Writer: set firstRow information in tables --- src/Text/Pandoc/Writers/Docx.hs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index c4de12d2f..81369e278 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -733,25 +733,30 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do if null contents then emptyCell else contents - let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells + let mkrow border cells = mknode "w:tr" [] $ + [mknode "w:trPr" [] [ + mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border] + ++ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" [("w:w", show (floor (textwidth * w) :: Integer))] () + let hasHeader = not (all null headers) return $ caption' ++ [mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","TableNormal")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : + mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths then [] else map mkgridcol widths) - : [ mkrow True headers' | not (all null headers) ] ++ + : [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' )] blockToOpenXML opts (BulletList lst) = do -- cgit v1.2.3