diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/TeXMath.hs | 11 | ||||
-rw-r--r-- | src/Text/Pandoc/Templates.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 626 | ||||
-rw-r--r-- | src/pandoc.hs | 29 |
5 files changed, 658 insertions, 13 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index e3c029992..c505ec965 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -94,6 +94,7 @@ module Text.Pandoc , writeTextile , writeRTF , writeODT + , writeDocx , writeEPUB , writeOrg , writeAsciiDoc @@ -128,6 +129,7 @@ import Text.Pandoc.Writers.ConTeXt import Text.Pandoc.Writers.Texinfo import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.ODT +import Text.Pandoc.Writers.Docx import Text.Pandoc.Writers.EPUB import Text.Pandoc.Writers.Docbook import Text.Pandoc.Writers.OpenDocument @@ -166,7 +168,7 @@ readers = [("native" , \_ -> readNative) ] -- | Association list of formats and writers (omitting the --- binary writers, odt and epub). +-- binary writers, odt, docx, and epub). writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] writers = [("native" , writeNative) ,("json" , \_ -> encodeJSON) diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index c24f29585..67dfe6753 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -69,6 +69,9 @@ expToInlines (ESymbol t s) = Just $ addSpace t (Str s) medspace = Str "\x2005" widespace = Str "\x2004" expToInlines (EStretchy x) = expToInlines x +expToInlines (EDelimited start end xs) = do + xs' <- mapM expToInlines xs + return $ [Str start] ++ concat xs' ++ [Str end] expToInlines (EGrouped xs) = expsToInlines xs expToInlines (ESpace "0.167em") = Just [Str "\x2009"] expToInlines (ESpace "0.222em") = Just [Str "\x2005"] @@ -94,10 +97,10 @@ expToInlines (ESubsup x y z) = do expToInlines (EDown x y) = expToInlines (ESub x y) expToInlines (EUp x y) = expToInlines (ESuper x y) expToInlines (EDownup x y z) = expToInlines (ESubsup x y z) -expToInlines (EText "normal" x) = Just [Str x] -expToInlines (EText "bold" x) = Just [Strong [Str x]] -expToInlines (EText "monospace" x) = Just [Code nullAttr x] -expToInlines (EText "italic" x) = Just [Emph [Str x]] +expToInlines (EText TextNormal x) = Just [Str x] +expToInlines (EText TextBold x) = Just [Strong [Str x]] +expToInlines (EText TextMonospace x) = Just [Code nullAttr x] +expToInlines (EText TextItalic x) = Just [Emph [Str x]] expToInlines (EText _ x) = Just [Str x] expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) = case accent of diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 0d627e447..1847cb0de 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -84,6 +84,7 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first getDefaultTemplate _ "native" = return $ Right "" getDefaultTemplate _ "json" = return $ Right "" getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument" +getDefaultTemplate user "docx" = getDefaultTemplate user "openxml" getDefaultTemplate user "epub" = getDefaultTemplate user "html" getDefaultTemplate user "beamer" = getDefaultTemplate user "latex" getDefaultTemplate user writer = do diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs new file mode 100644 index 000000000..4fa89acac --- /dev/null +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -0,0 +1,626 @@ +{- +Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Docx + Copyright : Copyright (C) 2012 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to docx. +-} +module Text.Pandoc.Writers.Docx ( writeDocx ) where +import Data.List ( intercalate, elemIndex ) +import System.FilePath ( (</>) ) +import qualified Data.ByteString.Lazy as B +import qualified Data.Map as M +import Data.ByteString.Lazy.UTF8 ( fromString, toString ) +import Codec.Archive.Zip +import System.Time +import Paths_pandoc ( getDataFileName ) +import Text.Pandoc.Definition +import Text.Pandoc.Generic +import System.Directory +import Text.Pandoc.ImageSize +import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Highlighting ( highlight ) +import Text.Highlighting.Kate.Types () +import Text.XML.Light +import Text.TeXMath +import Control.Monad.State +import Text.Highlighting.Kate + +data WriterState = WriterState{ + stTextProperties :: [Element] + , stParaProperties :: [Element] + , stFootnotes :: [Element] + , stSectionIds :: [String] + , stExternalLinks :: M.Map String String + , stImages :: M.Map FilePath (String, B.ByteString) + , stListLevel :: Int + , stListMarker :: ListMarker + , stMarkersUsed :: [ListMarker] + } + +data ListMarker = NoMarker + | BulletMarker + | NumberMarker ListNumberStyle ListNumberDelim Int + deriving (Show, Read, Eq, Ord) + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ + stTextProperties = [] + , stParaProperties = [] + , stFootnotes = [] + , stSectionIds = [] + , stExternalLinks = M.empty + , stImages = M.empty + , stListLevel = 0 -- not in a list + , stListMarker = NoMarker + , stMarkersUsed = [NoMarker] + } + +type WS a = StateT WriterState IO a + +showTopElement' :: Element -> String +showTopElement' x = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++ showElement x + +mknode :: Node t => String -> [(String,String)] -> t -> Element +mknode s attrs = + add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s) + +-- | Produce an Docx file from a Pandoc document. +writeDocx :: Maybe FilePath -- ^ Path specified by --reference-docx + -> WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> IO B.ByteString +writeDocx mbRefDocx opts doc@(Pandoc (Meta tit auths _) _) = do + let datadir = writerUserDataDir opts + refArchive <- liftM toArchive $ + case mbRefDocx of + Just f -> B.readFile f + Nothing -> do + let defaultDocx = getDataFileName "reference.docx" >>= B.readFile + case datadir of + Nothing -> defaultDocx + Just d -> do + exists <- doesFileExist (d </> "reference.docx") + if exists + then B.readFile (d </> "reference.docx") + else defaultDocx + + (newContents, st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc) + defaultWriterState + (TOD epochtime _) <- getClockTime + let imgs = M.elems $ stImages st + let imgPath ident img = "media/" ++ ident ++ + case imageType img of + Just Png -> ".png" + Just Jpeg -> ".jpeg" + Just Gif -> ".gif" + Nothing -> "" + let toImgRel (ident,img) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",imgPath ident img)] () + let newrels = map toImgRel imgs + let relpath = "word/_rels/document.xml.rels" + let reldoc = case findEntryByPath relpath refArchive >>= + parseXMLDoc . toString . fromEntry of + Just d -> d + Nothing -> error $ relpath ++ "missing in reference docx" + let reldoc' = reldoc{ elContent = elContent reldoc ++ map Elem newrels } + -- create entries for images + let toImageEntry (ident,img) = toEntry ("word/" ++ imgPath ident img) + epochtime img + let imageEntries = map toImageEntry imgs + -- NOW get list of external links and images from this, and do what's needed + let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () + let newrels' = map toLinkRel $ M.toList $ stExternalLinks st + let reldoc'' = reldoc' { elContent = elContent reldoc' ++ map Elem newrels' } + let relEntry = toEntry relpath epochtime $ fromString $ showTopElement' reldoc'' + let contentEntry = toEntry "word/document.xml" epochtime $ fromString $ showTopElement' newContents + -- styles + let newstyles = styleToOpenXml $ writerHighlightStyle opts + let stylepath = "word/styles.xml" + let styledoc = case findEntryByPath stylepath refArchive >>= + parseXMLDoc . toString . fromEntry of + Just d -> d + Nothing -> error $ stylepath ++ "missing in reference docx" + let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles } + let styleEntry = toEntry stylepath epochtime $ fromString $ showTopElement' styledoc' + -- construct word/numbering.xml + let markersUsed = stMarkersUsed st + let numpath = "word/numbering.xml" + let numEntry = toEntry numpath epochtime $ fromString $ showTopElement' $ mkNumbering markersUsed + -- TODO add metadata, etc. + let docPropsPath = "docProps/core.xml" + let docProps = mknode "cp:coreProperties" + [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") + ,("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:dcterms","http://purl.org/dc/terms/") + ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") + ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] + $ mknode "dc:title" [] (stringify tit) + : mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] () -- put doc date here + : mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here + : map (mknode "dc:creator" [] . stringify) auths + let docPropsEntry = toEntry docPropsPath epochtime $ fromString $ showTopElement' docProps + let archive = foldr addEntryToArchive refArchive $ + contentEntry : relEntry : numEntry : styleEntry : docPropsEntry : imageEntries + return $ fromArchive archive + +styleToOpenXml :: Style -> [Element] +styleToOpenXml style = parStyle : map toStyle alltoktypes + where alltoktypes = enumFromTo KeywordTok NormalTok + toStyle toktype = mknode "w:style" [("w:type","character"), + ("w:customStyle","1"),("w:styleId",show toktype)] + [ mknode "w:name" [("w:val",show toktype)] () + , mknode "w:basedOn" [("w:val","VerbatimChar")] () + , mknode "w:rPr" [] $ + [ mknode "w:color" [("w:val",tokCol toktype)] () + | tokCol toktype /= "auto" ] ++ + [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] () + | tokBg toktype /= "auto" ] ++ + [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++ + [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++ + [ mknode "w:u" [] () | tokFeature tokenUnderline toktype ] + ] + tokStyles = tokenStyles style + tokFeature f toktype = maybe False f $ lookup toktype tokStyles + tokCol toktype = maybe "auto" (drop 1 . fromColor) + $ (tokenColor =<< lookup toktype tokStyles) + `mplus` defaultColor style + tokBg toktype = maybe "auto" (drop 1 . fromColor) + $ (tokenBackground =<< lookup toktype tokStyles) + `mplus` backgroundColor style + parStyle = mknode "w:style" [("w:type","paragraph"), + ("w:customStyle","1"),("w:styleId","SourceCode")] + [ mknode "w:name" [("w:val","Source Code")] () + , mknode "w:basedOn" [("w:val","Normal")] () + , mknode "w:link" [("w:val","VerbatimChar")] () + , mknode "w:pPr" [] + $ mknode "w:wordWrap" [("w:val","off")] () + : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) + $ backgroundColor style ) + ] + +mkNumbering :: [ListMarker] -> Element +mkNumbering markers = + mknode "w:numbering" [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")] + $ zipWith mkAbstractNum nums markers + ++ map mkNum nums + where nums = [1..(length markers)] + +mkNum :: Int -> Element +mkNum numid = + mknode "w:num" [("w:numId",show numid)] + $ mknode "w:abstractNumId" [("w:val",show numid)] () + +mkAbstractNum :: Int -> ListMarker -> Element +mkAbstractNum numid marker = + mknode "w:abstractNum" [("w:abstractNumId",show numid)] + $ mknode "w:multiLevelType" [("w:val","multilevel")] () + : map (mkLvl marker) [0..6] + +mkLvl :: ListMarker -> Int -> Element +mkLvl marker lvl = + mknode "w:lvl" [("w:ilvl",show lvl)] $ + [ mknode "w:start" [("w:val",start)] () + | marker /= NoMarker && marker /= BulletMarker ] ++ + [ mknode "w:numFmt" [("w:val",fmt)] () + , mknode "w:lvlText" [("w:val",lvltxt)] () + , mknode "w:lvlJc" [("w:val","left")] () + , mknode "w:pPr" [] + [ mknode "w:tabs" [] + $ mknode "w:tab" [("w:val","num"),("w:pos",show $ lvl * step)] () + , mknode "w:ind" [("w:left",show $ lvl * step + hang),("w:hanging",show hang)] () + ] + ] + where (fmt, lvltxt, start) = + case marker of + NoMarker -> ("bullet"," ","1") + BulletMarker -> ("bullet",bulletFor lvl,"1") + NumberMarker st de n -> (styleFor st lvl + ,patternFor de ("%" ++ show (lvl + 1)) + ,show n) + step = 720 + hang = step `div` 2 + bulletFor 1 = "\8226" + bulletFor 2 = "\9702" + bulletFor 3 = "\8227" + bulletFor 4 = "\8259" + bulletFor 5 = "\8226" + bulletFor _ = "\9702" + styleFor UpperAlpha _ = "upperLetter" + styleFor LowerAlpha _ = "lowerLetter" + styleFor UpperRoman _ = "upperRoman" + styleFor LowerRoman _ = "lowerRoman" + styleFor Decimal _ = "decimal" + styleFor DefaultStyle 1 = "decimal" + styleFor DefaultStyle 2 = "lowerLetter" + styleFor DefaultStyle 3 = "lowerRoman" + styleFor DefaultStyle 4 = "decimal" + styleFor DefaultStyle 5 = "lowerLetter" + styleFor DefaultStyle 6 = "lowerRoman" + styleFor _ _ = "decimal" + patternFor OneParen s = s ++ ")" + patternFor TwoParens s = "(" ++ s ++ ")" + patternFor _ s = s ++ "." + +-- | Convert Pandoc document to string in OpenXML format. +writeOpenXML :: WriterOptions -> Pandoc -> WS Element +writeOpenXML opts (Pandoc (Meta tit auths dat) blocks) = do + title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] + authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts + [Para (intercalate [LineBreak] auths) | not (null auths)] + date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs + convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs + convertSpace xs = xs + let blocks' = bottomUp convertSpace $ blocks + doc <- blocksToOpenXML opts blocks' + notes' <- reverse `fmap` gets stFootnotes + let notes = case notes' of + [] -> [] + ns -> [mknode "w:footnotes" [] ns] + let meta = title ++ authors ++ date + return $ mknode "w:document" + [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") + ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") + ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") + ,("xmlns:o","urn:schemas-microsoft-com:office:office") + ,("xmlns:v","urn:schemas-microsoft-com:vml") + ,("xmlns:w10","urn:schemas-microsoft-com:office:word") + ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") + ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") + ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] + $ mknode "w:body" [] (meta ++ doc ++ notes) + +-- | Convert a list of Pandoc blocks to OpenXML. +blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] +blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls + +pStyle :: String -> Element +pStyle sty = mknode "w:pStyle" [("w:val",sty)] () + +rStyle :: String -> Element +rStyle sty = mknode "w:rStyle" [("w:val",sty)] () + +-- | Convert a Pandoc block element to OpenXML. +blockToOpenXML :: WriterOptions -> Block -> WS [Element] +blockToOpenXML _ Null = return [] +blockToOpenXML opts (Header lev lst) = do + contents <- withParaProp (pStyle $ "Heading" ++ show lev) $ + blockToOpenXML opts (Para lst) + usedIdents <- gets stSectionIds + let ident = uniqueIdent lst usedIdents + modify $ \s -> s{ stSectionIds = ident : stSectionIds s } + let bookmarkStart = mknode "w:bookmarkStart" [("w:id",ident) + ,("w:name",ident)] () + let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id",ident)] () + return $ [bookmarkStart] ++ contents ++ [bookmarkEnd] +blockToOpenXML opts (Plain lst) = blockToOpenXML opts (Para lst) +blockToOpenXML opts (Para x@[Image alt _]) = do + paraProps <- getParaProps + contents <- inlinesToOpenXML opts x + captionNode <- withParaProp (pStyle "ImageCaption") + $ blockToOpenXML opts (Para alt) + return $ mknode "w:p" [] (paraProps ++ contents) : captionNode +blockToOpenXML opts (Para lst) = do + paraProps <- getParaProps + contents <- inlinesToOpenXML opts lst + return [mknode "w:p" [] (paraProps ++ contents)] +blockToOpenXML _ (RawBlock format str) + | format == "openxml" = return [ x | Elem x <- parseXML str ] + | otherwise = return [] +blockToOpenXML opts (BlockQuote blocks) = + withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks +blockToOpenXML opts (CodeBlock attrs str) = + withParaProp (pStyle "SourceCode") $ blockToOpenXML opts $ Para [Code attrs str] +blockToOpenXML _ HorizontalRule = return [ + mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] + $ mknode "v:rect" [("style","width:0;height:1.5pt"), + ("o:hralign","center"), + ("o:hrstd","t"),("o:hr","t")] () ] +blockToOpenXML opts (Table caption aligns widths headers rows) = do + let captionStr = stringify caption + caption' <- if null caption + then return [] + else withParaProp (pStyle "TableCaption") + $ blockToOpenXML opts (Para caption) + let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () + let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) + $ blocksToOpenXML opts cell + headers' <- mapM cellToOpenXML $ zip aligns headers + rows' <- mapM (\cells -> mapM cellToOpenXML $ zip aligns cells) + $ rows + let borderProps = mknode "w:tcPr" [] + [ mknode "w:tcBorders" [] + $ mknode "w:bottom" [("w:val","single")] () + , mknode "w:vAlign" [("w:val","bottom")] () ] + let mkcell border contents = mknode "w:tc" [] + $ [ borderProps | border ] ++ + if null contents + then [mknode "w:p" [] ()] + else contents + let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells + let textwidth = 7920 -- 5.5 in in twips, 1/20 pt + let mkgridcol w = mknode "w:gridCol" + [("w:w", show $ (floor (textwidth * w) :: Integer))] () + return $ + [ mknode "w:tbl" [] + ( mknode "w:tblPr" [] + [ mknode "w:tblCaption" [("w:val", captionStr)] () + | not (null caption) ] + : mknode "w:tblGrid" [] + (if all (==0) widths + then [] + else map mkgridcol widths) + : [ mkrow True headers' | not (all null headers) ] ++ + map (mkrow False) rows' + ) + ] ++ caption' +blockToOpenXML opts (BulletList lst) = do + let marker = BulletMarker + asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst +blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do + let marker = NumberMarker numstyle numdelim start + asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst +blockToOpenXML opts (DefinitionList items) = + concat `fmap` mapM (definitionListItemToOpenXML opts) items + +definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] +definitionListItemToOpenXML opts (term,defs) = do + term' <- withParaProp (pStyle "DefinitionTerm") + $ blockToOpenXML opts (Para term) + defs' <- withParaProp (pStyle "Definition") + $ concat `fmap` mapM (blocksToOpenXML opts) defs + return $ term' ++ defs' + +getNumId :: WS Int +getNumId = do + marker <- gets stListMarker + markersUsed <- gets stMarkersUsed + case elemIndex marker markersUsed of + Just x -> return $ x + 1 + Nothing -> do + modify $ \st -> st{ stMarkersUsed = markersUsed ++ [marker] } + return $ length markersUsed + 1 + +listItemToOpenXML :: WriterOptions -> ListMarker -> [Block] -> WS [Element] +listItemToOpenXML _ _ [] = return [] +listItemToOpenXML opts marker (first:rest) = do + first' <- withMarker marker $ blockToOpenXML opts first + rest' <- withMarker NoMarker $ blocksToOpenXML opts rest + return $ first' ++ rest' + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +-- | Convert a list of inline elements to OpenXML. +inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element] +inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst + +withMarker :: ListMarker -> WS a -> WS a +withMarker m p = do + origMarker <- gets stListMarker + modify $ \st -> st{ stListMarker = m } + result <- p + modify $ \st -> st{ stListMarker = origMarker } + return result + +asList :: WS a -> WS a +asList p = do + origListLevel <- gets stListLevel + modify $ \st -> st{ stListLevel = stListLevel st + 1 } + result <- p + modify $ \st -> st{ stListLevel = origListLevel } + return result + +getTextProps :: WS [Element] +getTextProps = do + props <- gets stTextProperties + return $ if null props + then [] + else [mknode "w:rPr" [] $ props] + +pushTextProp :: Element -> WS () +pushTextProp d = modify $ \s -> s{ stTextProperties = d : stTextProperties s } + +popTextProp :: WS () +popTextProp = modify $ \s -> s{ stTextProperties = drop 1 $ stTextProperties s } + +withTextProp :: Element -> WS a -> WS a +withTextProp d p = do + pushTextProp d + res <- p + popTextProp + return res + +getParaProps :: WS [Element] +getParaProps = do + props <- gets stParaProperties + listLevel <- gets stListLevel + numid <- getNumId + let listPr = if listLevel >= 1 + then [ mknode "w:numPr" [] + [ mknode "w:numId" [("w:val",show numid)] () + , mknode "w:ilvl" [("w:val",show listLevel)] () ] + ] + else [] + return $ case props ++ listPr of + [] -> [] + ps -> [mknode "w:pPr" [] ps] + +pushParaProp :: Element -> WS () +pushParaProp d = modify $ \s -> s{ stParaProperties = d : stParaProperties s } + +popParaProp :: WS () +popParaProp = modify $ \s -> s{ stParaProperties = drop 1 $ stParaProperties s } + +withParaProp :: Element -> WS a -> WS a +withParaProp d p = do + pushParaProp d + res <- p + popParaProp + return res + +formattedString :: String -> WS [Element] +formattedString str = do + props <- getTextProps + return [ mknode "w:r" [] $ + props ++ + [ mknode "w:t" [("xml:space","preserve")] str ] ] + +-- | Convert an inline element to OpenXML. +inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] +inlineToOpenXML _ (Str str) = formattedString str +inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ") +inlineToOpenXML opts (Strong lst) = + withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst +inlineToOpenXML opts (Emph lst) = + withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst +inlineToOpenXML opts (Subscript lst) = + withTextProp (mknode "w:vertAlign" [("w:val","subscript")] ()) + $ inlinesToOpenXML opts lst +inlineToOpenXML opts (Superscript lst) = + withTextProp (mknode "w:vertAlign" [("w:val","superscript")] ()) + $ inlinesToOpenXML opts lst +inlineToOpenXML opts (SmallCaps lst) = + withTextProp (mknode "w:smallCaps" [] ()) + $ inlinesToOpenXML opts lst +inlineToOpenXML opts (Strikeout lst) = + withTextProp (mknode "w:strike" [] ()) + $ inlinesToOpenXML opts lst +inlineToOpenXML _ LineBreak = return [ mknode "w:br" [] () ] +inlineToOpenXML _ (RawInline f str) + | f == "openxml" = return [ x | Elem x <- parseXML str ] + | otherwise = return [] +inlineToOpenXML opts (Quoted quoteType lst) = + inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close] + where (open, close) = case quoteType of + SingleQuote -> ("\x2018", "\x2019") + DoubleQuote -> ("\x201C", "\x201D") +inlineToOpenXML opts (Math t str) = + case texMathToOMML dt str of + Right r -> return [r] + Left _ -> inlinesToOpenXML opts (readTeXMath str) + where dt = if t == InlineMath + then DisplayInline + else DisplayBlock +inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst +inlineToOpenXML _ (Code attrs str) = + withTextProp (rStyle "VerbatimChar") + $ case highlight formatOpenXML attrs str of + Nothing -> intercalate [mknode "w:br" [] ()] + `fmap` (mapM formattedString $ lines str) + Just h -> return h + where formatOpenXML _fmtOpts = intercalate [mknode "w:br" [] ()] . + map (map toHlTok) + toHlTok (toktype,tok) = mknode "w:r" [] + [ mknode "w:rPr" [] + [ rStyle $ show toktype ] + , mknode "w:t" [("xml:space","preserve")] tok ] +inlineToOpenXML opts (Note bs) = do + notes <- gets stFootnotes + let notenum = length notes + 1 + let notemarker = mknode "w:r" [] + [ mknode "w:rPr" [] (rStyle "FootnoteReference") + , mknode "w:footnoteRef" [] () ] + let notemarkerXml = RawInline "openxml" $ ppElement notemarker + let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs + insertNoteRef (Para ils : xs) = Para (notemarkerXml : ils) : xs + insertNoteRef xs = Para [notemarkerXml] : xs + contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts + $ insertNoteRef bs + let newnote = mknode "w:footnote" [("w:id",show notenum)] $ contents + modify $ \s -> s{ stFootnotes = newnote : notes } + return [ mknode "w:r" [] + [ mknode "w:rPr" [] (rStyle "FootnoteReference") + , mknode "w:footnoteReference" [("w:id", show notenum)] () ] ] +-- internal link: +inlineToOpenXML opts (Link txt ('#':xs,_)) = do + contents <- withTextProp (rStyle "Hyperlink") $ inlinesToOpenXML opts txt + return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] +-- external link: +inlineToOpenXML opts (Link txt (src,_)) = do + contents <- withTextProp (rStyle "Hyperlink") $ inlinesToOpenXML opts txt + extlinks <- gets stExternalLinks + ind <- case M.lookup src extlinks of + Just i -> return i + Nothing -> do + let i = "link" ++ show (M.size extlinks) + modify $ \st -> st{ stExternalLinks = + M.insert src i extlinks } + return i + return [ mknode "w:hyperlink" [("r:id",ind)] contents ] +inlineToOpenXML _ (Image _ (src, tit)) = do + imgs <- gets stImages + (ident,size) <- case M.lookup src imgs of + Just (i,img) -> return (i, imageSize img) + Nothing -> do + -- TODO check existence download etc. + img <- liftIO $ B.readFile src + let ident' = "image" ++ show (M.size imgs + 1) + let size' = imageSize img + modify $ \st -> st{ + stImages = M.insert src (ident',img) $ stImages st } + return (ident',size') + let (xpt,ypt) = maybe (120,120) sizeInPoints size + -- 12700 emu = 1 pt + let (xemu,yemu) = (xpt * 12700, ypt * 12700) + let cNvPicPr = mknode "pic:cNvPicPr" [] $ + mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] () + let nvPicPr = mknode "pic:nvPicPr" [] + [ mknode "pic:cNvPr" + [("descr",src),("id","0"),("name","Picture")] () + , cNvPicPr ] + let blipFill = mknode "pic:blipFill" [] + [ mknode "a:blip" [("r:embed",ident)] () + , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] + let xfrm = mknode "a:xfrm" [] + [ mknode "a:off" [("x","0"),("y","0")] () + , mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ] + let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ + mknode "a:avLst" [] () + let ln = mknode "a:ln" [("w","9525")] + [ mknode "a:noFill" [] () + , mknode "a:headEnd" [] () + , mknode "a:tailEnd" [] () ] + let spPr = mknode "pic:spPr" [("bwMode","auto")] + [xfrm, prstGeom, mknode "a:noFill" [] (), ln] + let graphic = mknode "a:graphic" [] $ + mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")] + [ mknode "pic:pic" [] + [ nvPicPr + , blipFill + , spPr ] ] + return [ mknode "w:r" [] $ + mknode "w:drawing" [] $ + mknode "wp:inline" [] + [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () + , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () + , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] () + , graphic ] ] + diff --git a/src/pandoc.hs b/src/pandoc.hs index 3660fc167..640d018b9 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -81,8 +81,8 @@ wrapWords indent c = wrap' (c - indent) (c - indent) then ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs -isNonTextOutput :: String -> Bool -isNonTextOutput = (`elem` ["odt","epub"]) +nonTextFormats :: [String] +nonTextFormats = ["odt","docx","epub"] -- | Data structure for command line options. data Opt = Opt @@ -110,6 +110,7 @@ data Opt = Opt , optChapters :: Bool -- ^ Use chapter for top-level sects , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math , optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt + , optReferenceDocx :: Maybe FilePath -- ^ Path of reference.docx , optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet , optEPUBMetadata :: String -- ^ EPUB metadata , optDumpArgs :: Bool -- ^ Output command-line arguments @@ -157,6 +158,7 @@ defaultOpts = Opt , optChapters = False , optHTMLMathMethod = PlainMath , optReferenceODT = Nothing + , optReferenceDocx = Nothing , optEPUBStylesheet = Nothing , optEPUBMetadata = "" , optDumpArgs = False @@ -530,6 +532,13 @@ options = "FILENAME") "" -- "Path of custom reference.odt" + , Option "" ["reference-docx"] + (ReqArg + (\arg opt -> do + return opt { optReferenceDocx = Just arg }) + "FILENAME") + "" -- "Path of custom reference.docx" + , Option "" ["epub-stylesheet"] (ReqArg (\arg opt -> do @@ -644,7 +653,7 @@ usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ (wrapWords 16 78 $ map fst readers) ++ "\nOutput formats: " ++ - (wrapWords 16 78 $ map fst writers ++ ["odt","epub"]) ++ "\nOptions:") + (wrapWords 16 78 $ map fst writers ++ nonTextFormats) ++ "\nOptions:") -- Determine default reader based on source file extensions defaultReaderName :: String -> [FilePath] -> String @@ -695,6 +704,7 @@ defaultWriterName x = ".texinfo" -> "texinfo" ".db" -> "docbook" ".odt" -> "odt" + ".docx" -> "docx" ".epub" -> "epub" ".org" -> "org" ".asciidoc" -> "asciidoc" @@ -750,6 +760,7 @@ main = do , optChapters = chapters , optHTMLMathMethod = mathMethod , optReferenceODT = referenceODT + , optReferenceDocx = referenceDocx , optEPUBStylesheet = epubStylesheet , optEPUBMetadata = epubMetadata , optDumpArgs = dumpArgs @@ -798,7 +809,7 @@ main = do Just r -> return r Nothing -> error ("Unknown reader: " ++ readerName') - let standalone' = standalone || isNonTextOutput writerName' + let standalone' = standalone || (`elem` nonTextFormats) writerName' templ <- case templatePath of _ | not standalone' -> return "" @@ -909,7 +920,7 @@ main = do writerHighlight = highlight, writerHighlightStyle = highlightStyle } - when (isNonTextOutput writerName' && outputFile == "-") $ + when (writerName' `elem` nonTextFormats&& outputFile == "-") $ do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName' ++ " output to stdout.\n" ++ "Specify an output file using the -o option.") exitWith $ ExitFailure 5 @@ -955,9 +966,13 @@ main = do Nothing | writerName' == "epub" -> writeEPUB epubStylesheet writerOptions doc2 >>= B.writeFile (encodeString outputFile) - Nothing | writerName' == "odt" -> + | writerName' == "odt" -> writeODT referenceODT writerOptions doc2 >>= B.writeFile (encodeString outputFile) + | writerName' == "docx" -> + writeDocx referenceDocx writerOptions doc2 + >>= B.writeFile (encodeString outputFile) + | otherwise -> error $ "Unknown writer: " ++ writerName' Just r -> writerFn outputFile =<< postProcess result where writerFn "-" = UTF8.putStr writerFn f = UTF8.writeFile f @@ -966,5 +981,3 @@ main = do postProcess = if selfContained && writerName' `elem` htmlFormats then makeSelfContained datadir else return - - Nothing -> error $ "Unknown writer: " ++ writerName' |