From 9894d05fe3fd239247f755c60dc22247360be958 Mon Sep 17 00:00:00 2001
From: Agustín Martín Barbero <agusmbaterra@gmail.com>
Date: Sat, 26 Jan 2019 16:14:35 -0800
Subject: Improve writing metadata for docx, pptx and odt (#5252)

* docx writer: support custom properties.  Solves the writer part of #3024.
  Also supports additional core properties:  `subject`, `lang`, `category`,
  `description`.

* odt writer: improve standard properties, including the following core properties:
  `generator` (Pandoc/VERSION), `description`, `subject`, `keywords`,
  `initial-creator` (from authors), `creation-date` (actual creation date).
  Also fix date.

* pptx writer: support custom properties.  Also supports additional core
  properties: `subject`, `category`, `description`.

* Includes golden tests.

* MANUAL: document metadata support for docx, odt, pptx writers
---
 src/Text/Pandoc/Writers/Docx.hs                    | 19 ++++++++--
 src/Text/Pandoc/Writers/ODT.hs                     | 36 ++++++++++++++-----
 src/Text/Pandoc/Writers/Powerpoint/Output.hs       | 41 +++++++++++++++++++---
 src/Text/Pandoc/Writers/Powerpoint/Presentation.hs | 21 +++++++++--
 4 files changed, 99 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 4f642871a..cee339ac7 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -496,7 +496,17 @@ writeDocx opts doc@(Pandoc meta _) = do
                        Just (MetaList xs) -> map stringify xs
                        _                  -> []
 
+  -- docProps/core.xml
   let docPropsPath = "docProps/core.xml"
+  let extraCoreProps = ["subject","lang","category","description"]
+  let extraCorePropsMap = M.fromList $ zip extraCoreProps
+                       ["dc:subject","dc:language","cp:category","dc:description"]
+  let lookupMetaString' :: String -> Meta -> String
+      lookupMetaString' key' meta' =
+        case key' of
+             "description"    -> intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta')
+             _                -> lookupMetaString key' meta'
+  
   let docProps = mknode "cp:coreProperties"
           [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
           ,("xmlns:dc","http://purl.org/dc/elements/1.1/")
@@ -505,14 +515,19 @@ writeDocx opts doc@(Pandoc meta _) = do
           ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
           $ mknode "dc:title" [] (stringify $ docTitle meta)
           : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta))
-          : mknode "cp:keywords" [] (intercalate ", " keywords)
+          : [ mknode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta)
+            | k <- M.keys (unMeta meta), k `elem` extraCoreProps]
+          ++ mknode "cp:keywords" [] (intercalate ", " keywords)
           : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
                    , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
                    ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
   let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
 
+  -- docProps/custom.xml
   let customProperties :: [(String, String)]
-      customProperties = [] -- FIXME
+      customProperties = [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta)
+                         , k `notElem` (["title", "author", "keywords"]
+                                       ++ extraCoreProps)]
   let mkCustomProp (k, v) pid = mknode "property"
          [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
          ,("pid", show pid)
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index a03575134..481e88da2 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -40,6 +40,7 @@ import Data.List (isPrefixOf, intercalate)
 import Data.Maybe (fromMaybe)
 import qualified Data.Map as Map
 import qualified Data.Text.Lazy as TL
+import Data.Time
 import System.FilePath (takeDirectory, takeExtension, (<.>))
 import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
 import Text.Pandoc.Class (PandocMonad, report, toLang)
@@ -50,8 +51,9 @@ import Text.Pandoc.Logging
 import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
 import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
 import Text.Pandoc.Pretty
-import Text.Pandoc.Shared (stringify, normalizeDate)
-import Text.Pandoc.Writers.Shared (lookupMetaString, fixDisplayMath)
+import Text.Pandoc.Shared (stringify, pandocVersion)
+import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
+                                   fixDisplayMath)
 import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
@@ -83,6 +85,7 @@ pandocToODT :: PandocMonad m
 pandocToODT opts doc@(Pandoc meta _) = do
   let title = docTitle meta
   let authors = docAuthors meta
+  utctime <- P.getCurrentTime
   lang <- toLang (getLang opts meta)
   refArchive <-
        case writerReferenceDoc opts of
@@ -125,9 +128,14 @@ pandocToODT opts doc@(Pandoc meta _) = do
               )
          )
   let archive' = addEntryToArchive manifestEntry archive
+  -- create meta.xml
   let userDefinedMetaFields = [k | k <- Map.keys (unMeta meta)
-                              , k `notElem` ["title", "lang", "author", "date"]]
+                              , k `notElem` ["title", "lang", "author"
+                                           , "description", "subject", "keywords"]]
   let escapedText = text . escapeStringForXML
+  let keywords = case lookupMeta "keywords" meta of
+                      Just (MetaList xs) -> map stringify xs
+                      _                  -> []
   let userDefinedMeta =
         map (\k -> inTags False "meta:user-defined"
               [ ("meta:name", escapeStringForXML k)
@@ -146,19 +154,29 @@ pandocToODT opts doc@(Pandoc meta _) = do
            ,("xmlns:ooo","http://openoffice.org/2004/office")
            ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#")
            ,("office:version","1.2")] ( inTags True "office:meta" [] $
-                 ( metaTag "dc:title" (stringify title)
+                 ( metaTag "meta:generator" ("Pandoc/" ++ pandocVersion)
+                   $$
+                   metaTag "dc:title" (stringify title)
+                   $$
+                   metaTag "dc:description"
+                          (intercalate "\n" (map stringify $
+                                         lookupMetaBlocks "description" meta))
+                   $$
+                   metaTag "dc:subject" (lookupMetaString "subject" meta)
+                   $$
+                   metaTag "meta:keyword" (intercalate ", " keywords)
                    $$
                    case lang of
                         Just l  -> metaTag "dc:language" (renderLang l)
                         Nothing -> empty
                    $$
-                   metaTag "dc:creator"
+                   (\d a -> metaTag "meta:initial-creator" a
+                         $$ metaTag "dc:creator" a
+                         $$ metaTag "meta:creation-date" d
+                         $$ metaTag "dc:date" d
+                   ) (formatTime defaultTimeLocale "%FT%XZ" utctime)
                      (intercalate "; " (map stringify authors))
                    $$
-                   maybe mempty
-                     (metaTag "dc:date")
-                       (normalizeDate (lookupMetaString "date" meta))
-                   $$
                    vcat userDefinedMeta
                  )
              )
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 3e6652686..7ab50b8c4 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -58,7 +58,7 @@ import Text.Pandoc.MIME
 import qualified Data.ByteString.Lazy as BL
 import Text.Pandoc.Writers.OOXML
 import qualified Data.Map as M
-import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes)
+import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes, isNothing)
 import Text.Pandoc.ImageSize
 import Control.Applicative ((<|>))
 import System.FilePath.Glob
@@ -252,6 +252,8 @@ presentationToArchiveP p@(Presentation docProps slides) = do
   newArch' <- foldM copyFileToArchive emptyArchive filePaths
   -- we make a docProps/core.xml entry out of the presentation docprops
   docPropsEntry <- docPropsToEntry docProps
+  -- we make a docProps/custom.xml entry out of the custom properties
+  docCustomPropsEntry <- docCustomPropsToEntry docProps
   -- we make this ourself in case there's something unexpected in the
   -- one in the reference doc.
   relsEntry <- topLevelRelsEntry
@@ -274,7 +276,8 @@ presentationToArchiveP p@(Presentation docProps slides) = do
     spkNotesEntries ++
     spkNotesRelEntries ++
     mediaEntries ++
-    [contentTypesEntry, docPropsEntry, relsEntry, presEntry, presRelsEntry]
+    [contentTypesEntry, docPropsEntry, docCustomPropsEntry, relsEntry,
+     presEntry, presRelsEntry]
 
 makeSlideIdMap :: Presentation -> M.Map SlideId Int
 makeSlideIdMap (Presentation _ slides) =
@@ -1425,6 +1428,10 @@ topLevelRels =
                  , relType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/extended-properties"
                  , relTarget = "docProps/app.xml"
                  }
+  , Relationship { relId = 4
+                 , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
+                 , relTarget = "docProps/custom.xml"
+                 }
   ]
 
 topLevelRelsEntry :: PandocMonad m => P m Entry
@@ -1657,7 +1664,7 @@ docPropsElement :: PandocMonad m => DocProps -> P m Element
 docPropsElement docProps = do
   utctime <- asks envUTCTime
   let keywords = case dcKeywords docProps of
-        Just xs -> intercalate "," xs
+        Just xs -> intercalate ", " xs
         Nothing -> ""
   return $
     mknode "cp:coreProperties"
@@ -1669,7 +1676,13 @@ docPropsElement docProps = do
     $ (mknode "dc:title" [] $ fromMaybe "" $ dcTitle docProps)
     : (mknode "dc:creator" [] $ fromMaybe "" $ dcCreator docProps)
     : (mknode "cp:keywords" [] keywords)
-    : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
+    : (if isNothing (dcSubject docProps) then [] else
+           [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps])
+    ++ (if isNothing (dcDescription docProps) then [] else
+           [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps])
+    ++ (if isNothing (cpCategory docProps) then [] else
+           [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps])
+    ++ (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
              , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
              ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
 
@@ -1677,6 +1690,21 @@ docPropsToEntry :: PandocMonad m => DocProps -> P m Entry
 docPropsToEntry docProps = docPropsElement docProps >>=
                            elemToEntry "docProps/core.xml"
 
+-- adapted from the Docx writer
+docCustomPropsElement :: PandocMonad m => DocProps -> P m Element
+docCustomPropsElement docProps = do
+  let mkCustomProp (k, v) pid = mknode "property"
+         [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
+         ,("pid", show pid)
+         ,("name", k)] $ mknode "vt:lpwstr" [] v
+  return $ mknode "Properties"
+          [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties")
+          ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes")
+          ] $ zipWith mkCustomProp (fromMaybe [] $ customProperties docProps) [(2 :: Int)..]
+
+docCustomPropsToEntry :: PandocMonad m => DocProps -> P m Entry
+docCustomPropsToEntry docProps = docCustomPropsElement docProps >>=
+                           elemToEntry "docProps/custom.xml"
 
 defaultContentTypeToElem :: DefaultContentType -> Element
 defaultContentTypeToElem dct =
@@ -1765,6 +1793,7 @@ presentationToContentTypes p@(Presentation _ slides) = do
 
       inheritedOverrides = mapMaybe pathToOverride filePaths
       docPropsOverride = mapMaybe pathToOverride ["docProps/core.xml"]
+      docCustomPropsOverride = mapMaybe pathToOverride ["docProps/custom.xml"]
       presOverride = mapMaybe pathToOverride ["ppt/presentation.xml"]
   relativePaths <- mapM slideToFilePath slides
   let slideOverrides = mapMaybe
@@ -1773,7 +1802,8 @@ presentationToContentTypes p@(Presentation _ slides) = do
   speakerNotesOverrides <- (mapMaybe pathToOverride) <$> getSpeakerNotesFilePaths
   return $ ContentTypes
     (defaults ++ mediaDefaults)
-    (inheritedOverrides ++ docPropsOverride ++ presOverride ++ slideOverrides ++ speakerNotesOverrides)
+    (inheritedOverrides ++ docPropsOverride ++ docCustomPropsOverride ++
+     presOverride ++ slideOverrides ++ speakerNotesOverrides)
 
 presML :: String
 presML = "application/vnd.openxmlformats-officedocument.presentationml"
@@ -1788,6 +1818,7 @@ getContentType fp
   | fp == "ppt/viewProps.xml" = Just $ presML ++ ".viewProps+xml"
   | fp == "ppt/tableStyles.xml" = Just $ presML ++ ".tableStyles+xml"
   | fp == "docProps/core.xml" = Just $ "application/vnd.openxmlformats-package.core-properties+xml"
+  | fp == "docProps/custom.xml" = Just $ "application/vnd.openxmlformats-officedocument.custom-properties+xml"
   | fp == "docProps/app.xml" = Just $ noPresML ++ ".extended-properties+xml"
   | "ppt" : "slideMasters" : f : [] <- splitDirectories fp
   , (_, ".xml") <- splitExtension f =
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
index 7897f2b11..fee1a0f6e 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
@@ -72,7 +72,8 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Walk
 import Data.Time (UTCTime)
 import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element"
-import Text.Pandoc.Writers.Shared (lookupMetaInlines, toTableOfContents)
+import Text.Pandoc.Writers.Shared (lookupMetaInlines, lookupMetaBlocks
+                                 , lookupMetaString, toTableOfContents)
 import qualified Data.Map as M
 import qualified Data.Set as S
 import Data.Maybe (maybeToList, fromMaybe)
@@ -180,7 +181,10 @@ data DocProps = DocProps { dcTitle :: Maybe String
                          , dcSubject :: Maybe String
                          , dcCreator :: Maybe String
                          , dcKeywords :: Maybe [String]
+                         , dcDescription :: Maybe String
+                         , cpCategory :: Maybe String
                          , dcCreated :: Maybe UTCTime
+                         , customProperties :: Maybe [(String, String)]
                          } deriving (Show, Eq)
 
 
@@ -930,13 +934,26 @@ metaToDocProps meta =
 
       authors = case map Shared.stringify $ docAuthors meta of
                   [] -> Nothing
-                  ss -> Just $ intercalate ";" ss
+                  ss -> Just $ intercalate "; " ss
+
+      description = case map Shared.stringify $ lookupMetaBlocks "description" meta of
+                  [] -> Nothing
+                  ss -> Just $ intercalate "_x000d_\n" ss
+
+      customProperties' = case [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta)
+                               , k `notElem` (["title", "author", "keywords", "description"
+                                             , "subject","lang","category"])] of
+                  [] -> Nothing
+                  ss -> Just ss
   in
     DocProps{ dcTitle = Shared.stringify <$> lookupMeta "title" meta
             , dcSubject = Shared.stringify <$> lookupMeta "subject" meta
             , dcCreator = authors
             , dcKeywords = keywords
+            , dcDescription = description
+            , cpCategory = Shared.stringify <$> lookupMeta "category" meta
             , dcCreated = Nothing
+            , customProperties = customProperties'
             }
 
 documentToPresentation :: WriterOptions
-- 
cgit v1.2.3