aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--data/reference.docxbin8474 -> 29139 bytes
-rw-r--r--pandoc.cabal4
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs26
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs105
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs26
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs178
-rw-r--r--tests/Tests/Writers/Docx.hs128
-rw-r--r--tests/docx/image_no_embed_writer.native2
-rw-r--r--tests/docx/inline_formatting_writer.native5
-rw-r--r--tests/docx/inline_images_writer.native2
-rw-r--r--tests/docx/links_writer.native6
-rw-r--r--tests/docx/lists_writer.native17
-rw-r--r--tests/media/rId25.jpg0
-rw-r--r--tests/media/rId26.jpg0
-rw-r--r--tests/media/rId27.jpg0
-rw-r--r--tests/test-pandoc.hs2
16 files changed, 401 insertions, 100 deletions
diff --git a/data/reference.docx b/data/reference.docx
index c2e5075b1..d0025ebe9 100644
--- a/data/reference.docx
+++ b/data/reference.docx
Binary files differ
diff --git a/pandoc.cabal b/pandoc.cabal
index a80696c99..9c613736d 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -338,7 +338,9 @@ Library
Other-Modules: Text.Pandoc.Readers.Docx.Lists,
Text.Pandoc.Readers.Docx.Reducible,
Text.Pandoc.Readers.Docx.Parse,
- Text.Pandoc.Readers.Docx.Fonts
+ Text.Pandoc.Readers.Docx.Fonts,
+ Text.Pandoc.Readers.Docx.Util,
+ Text.Pandoc.Readers.Docx.StyleMap
Text.Pandoc.Writers.Shared,
Text.Pandoc.Asciify,
Text.Pandoc.MIME,
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index b644923c4..cce80fb48 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -65,6 +65,7 @@ import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML)
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
import Text.TeXMath (Exp)
+import Text.Pandoc.Readers.Docx.Util
import Data.Char (readLitChar, ord, chr, isDigit)
data ReaderEnv = ReaderEnv { envNotes :: Notes
@@ -108,8 +109,6 @@ mapD f xs =
in
concatMapM handler xs
-type NameSpaces = [(String, String)]
-
data Docx = Docx Document
deriving Show
@@ -249,10 +248,6 @@ type ChangeId = String
type Author = String
type ChangeDate = String
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
-
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = do
let notes = archiveToNotes archive
@@ -269,7 +264,7 @@ archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
entry <- maybeToD $ findEntryByPath "word/document.xml" zf
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
+ let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
body <- elemToBody namespaces bodyElem
return $ Document namespaces body
@@ -288,7 +283,7 @@ archiveToStyles zf =
case stylesElem of
Nothing -> (M.empty, M.empty)
Just styElem ->
- let namespaces = mapMaybe attrToNSPair (elAttribs styElem)
+ let namespaces = elemToNameSpaces styElem
in
( M.fromList $ buildBasedOnList namespaces styElem
(Nothing :: Maybe CharStyle),
@@ -356,10 +351,10 @@ archiveToNotes zf =
enElem = findEntryByPath "word/endnotes.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
fn_namespaces = case fnElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Just e -> elemToNameSpaces e
Nothing -> []
en_namespaces = case enElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Just e -> elemToNameSpaces e
Nothing -> []
ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
fn = fnElem >>= (elemToNotes ns "footnote")
@@ -459,7 +454,7 @@ archiveToNumbering' zf = do
Nothing -> Just $ Numbering [] [] []
Just entry -> do
numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem)
+ let namespaces = elemToNameSpaces numberingElem
numElems = findChildren
(QName "num" (lookup "w" namespaces) (Just "w"))
numberingElem
@@ -488,15 +483,6 @@ elemToNotes _ _ _ = Nothing
---------------------------------------------
---------------------------------------------
-elemName :: NameSpaces -> String -> String -> QName
-elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix))
-
-isElem :: NameSpaces -> String -> String -> Element -> Bool
-isElem ns prefix name element =
- qName (elName element) == name &&
- qURI (elName element) == (lookup prefix ns)
-
-
elemToTblGrid :: NameSpaces -> Element -> D TblGrid
elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
let cols = findChildren (elemName ns "w" "gridCol") element
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
new file mode 100644
index 000000000..5a4e9cfc2
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
@@ -0,0 +1,105 @@
+module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
+ , defaultStyleMaps
+ , getStyleMaps
+ , getStyleId
+ , hasStyleName
+ ) where
+
+import Text.XML.Light
+import Text.Pandoc.Readers.Docx.Util
+import Control.Monad.State
+import Data.Char (toLower)
+import Data.Maybe (fromMaybe)
+import qualified Data.Map as M
+
+newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
+newtype CharStyleMap = CharStyleMap ( M.Map String String )
+
+class StyleMap a where
+ alterMap :: (M.Map String String -> M.Map String String) -> a -> a
+ getMap :: a -> M.Map String String
+
+instance StyleMap ParaStyleMap where
+ alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
+ getMap (ParaStyleMap m) = m
+
+instance StyleMap CharStyleMap where
+ alterMap f (CharStyleMap m) = CharStyleMap $ f m
+ getMap (CharStyleMap m) = m
+
+insert :: (StyleMap a) => String -> String -> a -> a
+insert k v = alterMap $ M.insert k v
+
+getStyleId :: (StyleMap a) => String -> a -> String
+getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
+
+hasStyleName :: (StyleMap a) => String -> a -> Bool
+hasStyleName styleName = M.member (map toLower styleName) . getMap
+
+data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
+ , sParaStyleMap :: ParaStyleMap
+ , sCharStyleMap :: CharStyleMap
+ }
+
+data StyleType = ParaStyle | CharStyle
+
+defaultStyleMaps :: StyleMaps
+defaultStyleMaps = StyleMaps { sNameSpaces = []
+ , sParaStyleMap = ParaStyleMap M.empty
+ , sCharStyleMap = CharStyleMap M.empty
+ }
+
+type StateM a = StateT StyleMaps Maybe a
+
+getStyleMaps :: Element -> StyleMaps
+getStyleMaps docElem = fromMaybe state' $ execStateT genStyleMap state'
+ where
+ state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
+ genStyleItem e = do
+ styleType <- getStyleType e
+ styleId <- getAttrStyleId e
+ nameValLowercase <- map toLower `fmap` getNameVal e
+ case styleType of
+ ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId
+ CharStyle -> modCharStyleMap $ insert nameValLowercase styleId
+ genStyleMap = do
+ style <- elemName' "style"
+ let styles = findChildren style docElem
+ forM_ styles genStyleItem
+
+modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM ()
+modParaStyleMap f = modify $ \s ->
+ s {sParaStyleMap = f $ sParaStyleMap s}
+
+modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM ()
+modCharStyleMap f = modify $ \s ->
+ s {sCharStyleMap = f $ sCharStyleMap s}
+
+getStyleType :: Element -> StateM StyleType
+getStyleType e = do
+ styleTypeStr <- getAttrType e
+ case styleTypeStr of
+ "paragraph" -> return ParaStyle
+ "character" -> return CharStyle
+ _ -> lift Nothing
+
+getAttrType :: Element -> StateM String
+getAttrType el = do
+ name <- elemName' "type"
+ lift $ findAttr name el
+
+getAttrStyleId :: Element -> StateM String
+getAttrStyleId el = do
+ name <- elemName' "styleId"
+ lift $ findAttr name el
+
+getNameVal :: Element -> StateM String
+getNameVal el = do
+ name <- elemName' "name"
+ val <- elemName' "val"
+ lift $ findChild name el >>= findAttr val
+
+elemName' :: String -> StateM QName
+elemName' name = do
+ namespaces <- gets sNameSpaces
+ return $ elemName namespaces "w" name
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
new file mode 100644
index 000000000..891f107b0
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -0,0 +1,26 @@
+module Text.Pandoc.Readers.Docx.Util (
+ NameSpaces
+ , elemName
+ , isElem
+ , elemToNameSpaces
+ ) where
+
+import Text.XML.Light
+import Data.Maybe (mapMaybe)
+
+type NameSpaces = [(String, String)]
+
+elemToNameSpaces :: Element -> NameSpaces
+elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+elemName :: NameSpaces -> String -> String -> QName
+elemName ns prefix name = QName name (lookup prefix ns) (Just prefix)
+
+isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem ns prefix name element =
+ qName (elName element) == name &&
+ qURI (elName element) == lookup prefix ns
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 441392918..81369e278 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to docx.
-}
module Text.Pandoc.Writers.Docx ( writeDocx ) where
-import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix )
+import Data.List ( intercalate, isPrefixOf, isSuffixOf )
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
@@ -54,6 +54,8 @@ import Text.Pandoc.Walk
import Text.Highlighting.Kate.Types ()
import Text.XML.Light as XML
import Text.TeXMath
+import Text.Pandoc.Readers.Docx.StyleMap
+import Text.Pandoc.Readers.Docx.Util (elemName)
import Control.Monad.State
import Text.Highlighting.Kate
import Data.Unique (hashUnique, newUnique)
@@ -63,8 +65,7 @@ import qualified Control.Exception as E
import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
extensionFromMimeType)
import Control.Applicative ((<$>), (<|>), (<*>))
-import Data.Maybe (fromMaybe, mapMaybe)
-import Data.Char (isDigit)
+import Data.Maybe (fromMaybe, mapMaybe, maybeToList)
data ListMarker = NoMarker
| BulletMarker
@@ -106,7 +107,7 @@ data WriterState = WriterState{
, stChangesAuthor :: String
, stChangesDate :: String
, stPrintWidth :: Integer
- , stHeadingStyles :: [(Int,String)]
+ , stStyleMaps :: StyleMaps
, stFirstPara :: Bool
}
@@ -127,7 +128,7 @@ defaultWriterState = WriterState{
, stChangesAuthor = "unknown"
, stChangesDate = "1969-12-31T19:00:00Z"
, stPrintWidth = 1
- , stHeadingStyles = []
+ , stStyleMaps = defaultStyleMaps
, stFirstPara = False
}
@@ -215,32 +216,14 @@ writeDocx opts doc@(Pandoc meta _) = do
styledoc <- parseXml refArchive distArchive stylepath
-- parse styledoc for heading styles
- let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) .
- filter ((==Just "xmlns") . qPrefix . attrKey) .
- elAttribs $ styledoc
- let headingStyles =
- let
- mywURI = lookup "w" styleNamespaces
- myName name = QName name mywURI (Just "w")
- getAttrStyleId = findAttr (myName "styleId")
- getNameVal = findChild (myName "name") >=> findAttr (myName "val")
- getNum s | not $ null s, all isDigit s = Just (read s :: Int)
- | otherwise = Nothing
- getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum
- getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum
- toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId
- toMap getF = mapMaybe (toTuple getF) $
- findChildren (myName "style") styledoc
- select a b | not $ null a = a
- | otherwise = b
- in
- select (toMap getEngHeader) (toMap getIntHeader)
+ let styleMaps = getStyleMaps styledoc
((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc')
defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username
, stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
, stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth)
- , stHeadingStyles = headingStyles}
+ , stStyleMaps = styleMaps
+ }
let epochtime = floor $ utcTimeToPOSIXSeconds utctime
let imgs = M.elems $ stImages st
@@ -393,9 +376,18 @@ writeDocx opts doc@(Pandoc meta _) = do
linkrels
-- styles
- let newstyles = styleToOpenXml $ writerHighlightStyle opts
- let styledoc' = styledoc{ elContent = elContent styledoc ++
- [Elem x | x <- newstyles, writerHighlight opts] }
+ let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts
+ let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) }
+ where
+ modifyContent
+ | writerHighlight opts = (++ map Elem newstyles)
+ | otherwise = filter notTokStyle
+ notTokStyle (Elem el) = notStyle el || notTokId el
+ notTokStyle _ = True
+ notStyle = (/= elemName' "style") . elName
+ notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId")
+ tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok)
+ elemName' = elemName (sNameSpaces styleMaps) "w"
let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
-- construct word/numbering.xml
@@ -472,10 +464,13 @@ writeDocx opts doc@(Pandoc meta _) = do
miscRelEntries ++ otherMediaEntries
return $ fromArchive archive
-styleToOpenXml :: Style -> [Element]
-styleToOpenXml style = parStyle : map toStyle alltoktypes
+styleToOpenXml :: StyleMaps -> Style -> [Element]
+styleToOpenXml sm style =
+ maybeToList parStyle ++ mapMaybe toStyle alltoktypes
where alltoktypes = enumFromTo KeywordTok NormalTok
- toStyle toktype = mknode "w:style" [("w:type","character"),
+ toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing
+ | otherwise = Just $
+ mknode "w:style" [("w:type","character"),
("w:customStyle","1"),("w:styleId",show toktype)]
[ mknode "w:name" [("w:val",show toktype)] ()
, mknode "w:basedOn" [("w:val","VerbatimChar")] ()
@@ -496,7 +491,9 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes
tokBg toktype = maybe "auto" (drop 1 . fromColor)
$ (tokenBackground =<< lookup toktype tokStyles)
`mplus` backgroundColor style
- parStyle = mknode "w:style" [("w:type","paragraph"),
+ parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing
+ | otherwise = Just $
+ mknode "w:style" [("w:type","paragraph"),
("w:customStyle","1"),("w:styleId","SourceCode")]
[ mknode "w:name" [("w:val","Source Code")] ()
, mknode "w:basedOn" [("w:val","Normal")] ()
@@ -602,14 +599,14 @@ writeOpenXML opts (Pandoc meta blocks) = do
Just (MetaBlocks [Para xs]) -> xs
Just (MetaInlines xs) -> xs
_ -> []
- title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
- subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
- authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $
+ title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
+ subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
+ authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $
map Para auths
- date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
+ date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
abstract <- if null abstract'
then return []
- else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract'
+ else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract'
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
convertSpace xs = xs
@@ -623,11 +620,23 @@ writeOpenXML opts (Pandoc meta blocks) = do
blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
-pStyle :: String -> Element
-pStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
+pCustomStyle :: String -> Element
+pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
-rStyle :: String -> Element
-rStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
+pStyleM :: String -> WS XML.Element
+pStyleM styleName = do
+ styleMaps <- gets stStyleMaps
+ let sty' = getStyleId styleName $ sParaStyleMap styleMaps
+ return $ mknode "w:pStyle" [("w:val",sty')] ()
+
+rCustomStyle :: String -> Element
+rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
+
+rStyleM :: String -> WS XML.Element
+rStyleM styleName = do
+ styleMaps <- gets stStyleMaps
+ let sty' = getStyleId styleName $ sCharStyleMap styleMaps
+ return $ mknode "w:rStyle" [("w:val",sty')] ()
getUniqueId :: MonadIO m => m String
-- the + 20 is to ensure that there are no clashes with the rIds
@@ -641,13 +650,12 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do
let (hs, bs') = span isHeaderBlock bs
header <- blocksToOpenXML opts hs
-- We put the Bibliography style on paragraphs after the header
- rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs'
+ rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs'
return (header ++ rest)
blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs
blockToOpenXML opts (Header lev (ident,_,_) lst) = do
setFirstPara
- headingStyles <- gets stHeadingStyles
- paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $
+ paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $
getParaProps False
contents <- inlinesToOpenXML opts lst
usedIdents <- gets stSectionIds
@@ -660,26 +668,27 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do
,("w:name",bookmarkName)] ()
let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)]
-blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact")
+blockToOpenXML opts (Plain lst) = withParaProp (pCustomStyle "Compact")
$ blockToOpenXML opts (Para lst)
-- title beginning with fig: indicates that the image is a figure
blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do
setFirstPara
paraProps <- getParaProps False
contents <- inlinesToOpenXML opts [Image alt (src,tit)]
- captionNode <- withParaProp (pStyle "ImageCaption")
+ captionNode <- withParaProp (pCustomStyle "ImageCaption")
$ blockToOpenXML opts (Para alt)
return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
-- fixDisplayMath sometimes produces a Para [] as artifact
blockToOpenXML _ (Para []) = return []
blockToOpenXML opts (Para lst) = do
- isFirstPara <- gets stFirstPara
+ isFirstPara <- gets stFirstPara
paraProps <- getParaProps $ case lst of
[Math DisplayMath _] -> True
_ -> False
+ bodyTextStyle <- pStyleM "Body Text"
let paraProps' = case paraProps of
- [] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "FirstParagraph")]]
- [] -> [mknode "w:pPr" [] [(pStyle "BodyText")]]
+ [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]]
+ [] -> [mknode "w:pPr" [] [bodyTextStyle]]
ps -> ps
modify $ \s -> s { stFirstPara = False }
contents <- inlinesToOpenXML opts lst
@@ -688,11 +697,11 @@ blockToOpenXML _ (RawBlock format str)
| format == Format "openxml" = return [ x | Elem x <- parseXML str ]
| otherwise = return []
blockToOpenXML opts (BlockQuote blocks) = do
- p <- withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks
+ p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks
setFirstPara
return p
blockToOpenXML opts (CodeBlock attrs str) = do
- p <- withParaProp (pStyle "SourceCode") $ (blockToOpenXML opts $ Para [Code attrs str])
+ p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str])
setFirstPara
return p
blockToOpenXML _ HorizontalRule = do
@@ -707,7 +716,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do
let captionStr = stringify caption
caption' <- if null caption
then return []
- else withParaProp (pStyle "TableCaption")
+ else withParaProp (pCustomStyle "TableCaption")
$ blockToOpenXML opts (Para caption)
let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
@@ -718,32 +727,36 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do
[ mknode "w:tcBorders" []
$ mknode "w:bottom" [("w:val","single")] ()
, mknode "w:vAlign" [("w:val","bottom")] () ]
- let emptyCell = [mknode "w:p" [] [mknode "w:pPr" []
- [mknode "w:pStyle" [("w:val","Compact")] ()]]]
+ let emptyCell = [mknode "w:p" [] [pCustomStyle "Compact"]]
let mkcell border contents = mknode "w:tc" []
$ [ borderProps | border ] ++
if null contents
then emptyCell
else contents
- let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells
+ let mkrow border cells = mknode "w:tr" [] $
+ [mknode "w:trPr" [] [
+ mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border]
+ ++ map (mkcell border) cells
let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
let fullrow = 5000 -- 100% specified in pct
let rowwidth = fullrow * sum widths
let mkgridcol w = mknode "w:gridCol"
[("w:w", show (floor (textwidth * w) :: Integer))] ()
+ let hasHeader = not (all null headers)
return $
caption' ++
[mknode "w:tbl" []
( mknode "w:tblPr" []
( mknode "w:tblStyle" [("w:val","TableNormal")] () :
mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
+ mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () :
[ mknode "w:tblCaption" [("w:val", captionStr)] ()
| not (null caption) ] )
: mknode "w:tblGrid" []
(if all (==0) widths
then []
else map mkgridcol widths)
- : [ mkrow True headers' | not (all null headers) ] ++
+ : [ mkrow True headers' | hasHeader ] ++
map (mkrow False) rows'
)]
blockToOpenXML opts (BulletList lst) = do
@@ -767,9 +780,9 @@ blockToOpenXML opts (DefinitionList items) = do
definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element]
definitionListItemToOpenXML opts (term,defs) = do
- term' <- withParaProp (pStyle "DefinitionTerm")
+ term' <- withParaProp (pCustomStyle "DefinitionTerm")
$ blockToOpenXML opts (Para term)
- defs' <- withParaProp (pStyle "Definition")
+ defs' <- withParaProp (pCustomStyle "Definition")
$ concat `fmap` mapM (blocksToOpenXML opts) defs
return $ term' ++ defs'
@@ -833,6 +846,9 @@ withTextProp d p = do
popTextProp
return res
+withTextPropM :: WS Element -> WS a -> WS a
+withTextPropM = (. flip withTextProp) . (>>=)
+
getParaProps :: Bool -> WS [Element]
getParaProps displayMathPara = do
props <- gets stParaProperties
@@ -861,6 +877,9 @@ withParaProp d p = do
popParaProp
return res
+withParaPropM :: WS Element -> WS a -> WS a
+withParaPropM = (. flip withParaProp) . (>>=)
+
formattedString :: String -> WS [Element]
formattedString str = do
props <- getTextProps
@@ -943,25 +962,26 @@ inlineToOpenXML opts (Math mathType str) = do
Right r -> return [r]
Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str)
inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
-inlineToOpenXML opts (Code attrs str) =
- withTextProp (rStyle "VerbatimChar")
- $ if writerHighlight opts
- then case highlight formatOpenXML attrs str of
- Nothing -> unhighlighted
- Just h -> return h
- else unhighlighted
- where unhighlighted = intercalate [br] `fmap`
- (mapM formattedString $ lines str)
- formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
- toHlTok (toktype,tok) = mknode "w:r" []
- [ mknode "w:rPr" []
- [ rStyle $ show toktype ]
- , mknode "w:t" [("xml:space","preserve")] tok ]
+inlineToOpenXML opts (Code attrs str) = do
+ let unhighlighted = intercalate [br] `fmap`
+ (mapM formattedString $ lines str)
+ formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
+ toHlTok (toktype,tok) = mknode "w:r" []
+ [ mknode "w:rPr" []
+ [ rCustomStyle (show toktype) ]
+ , mknode "w:t" [("xml:space","preserve")] tok ]
+ withTextProp (rCustomStyle "VerbatimChar")
+ $ if writerHighlight opts
+ then case highlight formatOpenXML attrs str of
+ Nothing -> unhighlighted
+ Just h -> return h
+ else unhighlighted
inlineToOpenXML opts (Note bs) = do
notes <- gets stFootnotes
notenum <- getUniqueId
+ footnoteStyle <- rStyleM "Footnote Reference"
let notemarker = mknode "w:r" []
- [ mknode "w:rPr" [] (rStyle "FootnoteRef")
+ [ mknode "w:rPr" [] footnoteStyle
, mknode "w:footnoteRef" [] () ]
let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs
@@ -971,22 +991,22 @@ inlineToOpenXML opts (Note bs) = do
oldParaProperties <- gets stParaProperties
oldTextProperties <- gets stTextProperties
modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] }
- contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts
+ contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts
$ insertNoteRef bs
modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties,
stTextProperties = oldTextProperties }
let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents
modify $ \s -> s{ stFootnotes = newnote : notes }
return [ mknode "w:r" []
- [ mknode "w:rPr" [] (rStyle "FootnoteRef")
+ [ mknode "w:rPr" [] footnoteStyle
, mknode "w:footnoteReference" [("w:id", notenum)] () ] ]
-- internal link:
inlineToOpenXML opts (Link txt ('#':xs,_)) = do
- contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt
+ contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
-- external link:
inlineToOpenXML opts (Link txt (src,_)) = do
- contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt
+ contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
extlinks <- gets stExternalLinks
id' <- case M.lookup src extlinks of
Just i -> return i
@@ -1088,7 +1108,7 @@ defaultFootnotes = [ mknode "w:footnote"
[ mknode "w:p" [] $
[ mknode "w:r" [] $
[ mknode "w:continuationSeparator" [] ()]]]]
-
+
parseXml :: Archive -> Archive -> String -> IO Element
parseXml refArchive distArchive relpath =
case ((findEntryByPath relpath refArchive `mplus`
diff --git a/tests/Tests/Writers/Docx.hs b/tests/Tests/Writers/Docx.hs
new file mode 100644
index 000000000..80ce0014d
--- /dev/null
+++ b/tests/Tests/Writers/Docx.hs
@@ -0,0 +1,128 @@
+module Tests.Writers.Docx (tests) where
+
+import Text.Pandoc.Options
+import Text.Pandoc.Readers.Native
+import Text.Pandoc.Definition
+import Tests.Helpers
+import Test.Framework
+import Text.Pandoc.Readers.Docx
+import Text.Pandoc.Writers.Docx
+
+type Options = (WriterOptions, ReaderOptions)
+
+compareOutput :: Options
+ -> FilePath
+ -> IO (Pandoc, Pandoc)
+compareOutput opts nativeFile = do
+ nf <- Prelude.readFile nativeFile
+ df <- writeDocx (fst opts) (readNative nf)
+ let (p, _) = readDocx (snd opts) df
+ return (p, readNative nf)
+
+testCompareWithOptsIO :: Options -> String -> FilePath -> IO Test
+testCompareWithOptsIO opts name nativeFile = do
+ (dp, np) <- compareOutput opts nativeFile
+ return $ test id name (dp, np)
+
+testCompareWithOpts :: Options -> String -> FilePath -> Test
+testCompareWithOpts opts name nativeFile =
+ buildTest $ testCompareWithOptsIO opts name nativeFile
+
+testCompare :: String -> FilePath -> Test
+testCompare = testCompareWithOpts def
+
+tests :: [Test]
+tests = [ testGroup "inlines"
+ [ testCompare
+ "font formatting"
+ "docx/inline_formatting_writer.native"
+ , testCompare
+ "font formatting with character styles"
+ "docx/char_styles.native"
+ , testCompare
+ "hyperlinks"
+ "docx/links_writer.native"
+ , testCompare
+ "inline image"
+ "docx/image_no_embed_writer.native"
+ , testCompare
+ "inline image in links"
+ "docx/inline_images_writer.native"
+ , testCompare
+ "handling unicode input"
+ "docx/unicode.native"
+ , testCompare
+ "literal tabs"
+ "docx/tabs.native"
+ , testCompare
+ "normalizing inlines"
+ "docx/normalize.native"
+ , testCompare
+ "normalizing inlines deep inside blocks"
+ "docx/deep_normalize.native"
+ , testCompare
+ "move trailing spaces outside of formatting"
+ "docx/trailing_spaces_in_formatting.native"
+ , testCompare
+ "inline code (with VerbatimChar style)"
+ "docx/inline_code.native"
+ , testCompare
+ "inline code in subscript and superscript"
+ "docx/verbatim_subsuper.native"
+ ]
+ , testGroup "blocks"
+ [ testCompare
+ "headers"
+ "docx/headers.native"
+ , testCompare
+ "headers already having auto identifiers"
+ "docx/already_auto_ident.native"
+ , testCompare
+ "numbered headers automatically made into list"
+ "docx/numbered_header.native"
+ , testCompare
+ "i18n blocks (headers and blockquotes)"
+ "docx/i18n_blocks.native"
+ -- Continuation does not survive round-trip
+ , testCompare
+ "lists"
+ "docx/lists_writer.native"
+ , testCompare
+ "definition lists"
+ "docx/definition_list.native"
+ , testCompare
+ "custom defined lists in styles"
+ "docx/german_styled_lists.native"
+ , testCompare
+ "footnotes and endnotes"
+ "docx/notes.native"
+ , testCompare
+ "blockquotes (parsing indent as blockquote)"
+ "docx/block_quotes_parse_indent.native"
+ , testCompare
+ "hanging indents"
+ "docx/hanging_indent.native"
+ -- tables headers do not survive round-trip, should look into that
+ , testCompare
+ "tables"
+ "docx/tables.native"
+ , testCompare
+ "tables with lists in cells"
+ "docx/table_with_list_cell.native"
+ , testCompare
+ "code block"
+ "docx/codeblock.native"
+ , testCompare
+ "dropcap paragraphs"
+ "docx/drop_cap.native"
+ ]
+ , testGroup "metadata"
+ [ testCompareWithOpts (def,def{readerStandalone=True})
+ "metadata fields"
+ "docx/metadata.native"
+ , testCompareWithOpts (def,def{readerStandalone=True})
+ "stop recording metadata with normal text"
+ "docx/metadata_after_normal.native"
+ ]
+
+ ]
diff --git a/tests/docx/image_no_embed_writer.native b/tests/docx/image_no_embed_writer.native
new file mode 100644
index 000000000..21802ebd1
--- /dev/null
+++ b/tests/docx/image_no_embed_writer.native
@@ -0,0 +1,2 @@
+[Para [Str "An",Space,Str "image:"]
+,Para [Image [] ("media/rId25.jpg","")]]
diff --git a/tests/docx/inline_formatting_writer.native b/tests/docx/inline_formatting_writer.native
new file mode 100644
index 000000000..be346204e
--- /dev/null
+++ b/tests/docx/inline_formatting_writer.native
@@ -0,0 +1,5 @@
+[Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."]
+,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."]
+,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Emph [Str "single",Space,Str "underlines",Space,Str "for",Space,Str "emphasis"],Str "."]
+,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."]
+,Para [Str "A",Space,Str "line",LineBreak,Str "break."]]
diff --git a/tests/docx/inline_images_writer.native b/tests/docx/inline_images_writer.native
new file mode 100644
index 000000000..da2a2709b
--- /dev/null
+++ b/tests/docx/inline_images_writer.native
@@ -0,0 +1,2 @@
+[Para [Str "This",Space,Str "picture",Space,Image [] ("media/rId26.jpg",""),Space,Str "is",Space,Str "an",Space,Str "identicon."]
+,Para [Str "Here",Space,Str "is",Space,Link [Str "one",Space,Image [] ("media/rId27.jpg",""),Space,Str "that"] ("http://www.google.com",""),Space,Str "links."]]
diff --git a/tests/docx/links_writer.native b/tests/docx/links_writer.native
new file mode 100644
index 000000000..cc00e4326
--- /dev/null
+++ b/tests/docx/links_writer.native
@@ -0,0 +1,6 @@
+[Header 2 ("an-internal-link-and-an-external-link",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"]
+,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://google.com",""),Space,Str "to",Space,Str "a",Space,Str "popular",Space,Str "website."]
+,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://johnmacfarlane.net/pandoc/README.html#synopsis",""),Space,Str "to",Space,Str "a",Space,Str "website",Space,Str "with",Space,Str "an",Space,Str "anchor."]
+,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#a-section-for-testing-link-targets",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."]
+,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#my_bookmark",""),Space,Str "to",Space,Str "a",Space,Str "bookmark."]
+,Header 2 ("a-section-for-testing-link-targets",[],[]) [Str "A",Space,Str "section",Space,Str "for",Space,Str "testing",Space,Str "link",Space,Str "targets"]]
diff --git a/tests/docx/lists_writer.native b/tests/docx/lists_writer.native
new file mode 100644
index 000000000..4c44ea603
--- /dev/null
+++ b/tests/docx/lists_writer.native
@@ -0,0 +1,17 @@
+[Header 2 ("some-nested-lists",[],[]) [Str "Some",Space,Str "nested",Space,Str "lists"]
+,OrderedList (1,Decimal,Period)
+ [[Para [Str "one"]]
+ ,[Para [Str "two"]
+ ,OrderedList (1,LowerAlpha,DefaultDelim)
+ [[Para [Str "a"]]
+ ,[Para [Str "b"]]]]]
+,BulletList
+ [[Para [Str "one"]]
+ ,[Para [Str "two"]
+ ,BulletList
+ [[Para [Str "three"]
+ ,BulletList
+ [[Para [Str "four"]]]]]]
+ ,[Para [Str "Same",Space,Str "list"]]]
+,BulletList
+ [[Para [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]]
diff --git a/tests/media/rId25.jpg b/tests/media/rId25.jpg
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/media/rId25.jpg
diff --git a/tests/media/rId26.jpg b/tests/media/rId26.jpg
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/media/rId26.jpg
diff --git a/tests/media/rId27.jpg b/tests/media/rId27.jpg
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/media/rId27.jpg
diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs
index b7b1c30b1..dd92a271a 100644
--- a/tests/test-pandoc.hs
+++ b/tests/test-pandoc.hs
@@ -20,6 +20,7 @@ import qualified Tests.Writers.Native
import qualified Tests.Writers.Markdown
import qualified Tests.Writers.Plain
import qualified Tests.Writers.AsciiDoc
+import qualified Tests.Writers.Docx
import qualified Tests.Shared
import qualified Tests.Walk
import Text.Pandoc.Shared (inDirectory)
@@ -38,6 +39,7 @@ tests = [ testGroup "Old" Tests.Old.tests
, testGroup "Markdown" Tests.Writers.Markdown.tests
, testGroup "Plain" Tests.Writers.Plain.tests
, testGroup "AsciiDoc" Tests.Writers.AsciiDoc.tests
+ , testGroup "Docx" Tests.Writers.Docx.tests
]
, testGroup "Readers"
[ testGroup "LaTeX" Tests.Readers.LaTeX.tests