From 9894d05fe3fd239247f755c60dc22247360be958 Mon Sep 17 00:00:00 2001 From: Agustín Martín Barbero Date: Sat, 26 Jan 2019 16:14:35 -0800 Subject: Improve writing metadata for docx, pptx and odt (#5252) * docx writer: support custom properties. Solves the writer part of #3024. Also supports additional core properties: `subject`, `lang`, `category`, `description`. * odt writer: improve standard properties, including the following core properties: `generator` (Pandoc/VERSION), `description`, `subject`, `keywords`, `initial-creator` (from authors), `creation-date` (actual creation date). Also fix date. * pptx writer: support custom properties. Also supports additional core properties: `subject`, `category`, `description`. * Includes golden tests. * MANUAL: document metadata support for docx, odt, pptx writers --- src/Text/Pandoc/Writers/Docx.hs | 19 ++++++++-- src/Text/Pandoc/Writers/ODT.hs | 36 ++++++++++++++----- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 41 +++++++++++++++++++--- src/Text/Pandoc/Writers/Powerpoint/Presentation.hs | 21 +++++++++-- 4 files changed, 99 insertions(+), 18 deletions(-) (limited to 'src/Text/Pandoc/Writers') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 4f642871a..cee339ac7 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -496,7 +496,17 @@ writeDocx opts doc@(Pandoc meta _) = do Just (MetaList xs) -> map stringify xs _ -> [] + -- docProps/core.xml let docPropsPath = "docProps/core.xml" + let extraCoreProps = ["subject","lang","category","description"] + let extraCorePropsMap = M.fromList $ zip extraCoreProps + ["dc:subject","dc:language","cp:category","dc:description"] + let lookupMetaString' :: String -> Meta -> String + lookupMetaString' key' meta' = + case key' of + "description" -> intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta') + _ -> lookupMetaString key' meta' + let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") ,("xmlns:dc","http://purl.org/dc/elements/1.1/") @@ -505,14 +515,19 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ mknode "dc:title" [] (stringify $ docTitle meta) : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) - : mknode "cp:keywords" [] (intercalate ", " keywords) + : [ mknode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta) + | k <- M.keys (unMeta meta), k `elem` extraCoreProps] + ++ mknode "cp:keywords" [] (intercalate ", " keywords) : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps + -- docProps/custom.xml let customProperties :: [(String, String)] - customProperties = [] -- FIXME + customProperties = [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta) + , k `notElem` (["title", "author", "keywords"] + ++ extraCoreProps)] let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") ,("pid", show pid) diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index a03575134..481e88da2 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -40,6 +40,7 @@ import Data.List (isPrefixOf, intercalate) import Data.Maybe (fromMaybe) import qualified Data.Map as Map import qualified Data.Text.Lazy as TL +import Data.Time import System.FilePath (takeDirectory, takeExtension, (<.>)) import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) import Text.Pandoc.Class (PandocMonad, report, toLang) @@ -50,8 +51,9 @@ import Text.Pandoc.Logging import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.Pandoc.Pretty -import Text.Pandoc.Shared (stringify, normalizeDate) -import Text.Pandoc.Writers.Shared (lookupMetaString, fixDisplayMath) +import Text.Pandoc.Shared (stringify, pandocVersion) +import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, + fixDisplayMath) import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) @@ -83,6 +85,7 @@ pandocToODT :: PandocMonad m pandocToODT opts doc@(Pandoc meta _) = do let title = docTitle meta let authors = docAuthors meta + utctime <- P.getCurrentTime lang <- toLang (getLang opts meta) refArchive <- case writerReferenceDoc opts of @@ -125,9 +128,14 @@ pandocToODT opts doc@(Pandoc meta _) = do ) ) let archive' = addEntryToArchive manifestEntry archive + -- create meta.xml let userDefinedMetaFields = [k | k <- Map.keys (unMeta meta) - , k `notElem` ["title", "lang", "author", "date"]] + , k `notElem` ["title", "lang", "author" + , "description", "subject", "keywords"]] let escapedText = text . escapeStringForXML + let keywords = case lookupMeta "keywords" meta of + Just (MetaList xs) -> map stringify xs + _ -> [] let userDefinedMeta = map (\k -> inTags False "meta:user-defined" [ ("meta:name", escapeStringForXML k) @@ -146,19 +154,29 @@ pandocToODT opts doc@(Pandoc meta _) = do ,("xmlns:ooo","http://openoffice.org/2004/office") ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#") ,("office:version","1.2")] ( inTags True "office:meta" [] $ - ( metaTag "dc:title" (stringify title) + ( metaTag "meta:generator" ("Pandoc/" ++ pandocVersion) + $$ + metaTag "dc:title" (stringify title) + $$ + metaTag "dc:description" + (intercalate "\n" (map stringify $ + lookupMetaBlocks "description" meta)) + $$ + metaTag "dc:subject" (lookupMetaString "subject" meta) + $$ + metaTag "meta:keyword" (intercalate ", " keywords) $$ case lang of Just l -> metaTag "dc:language" (renderLang l) Nothing -> empty $$ - metaTag "dc:creator" + (\d a -> metaTag "meta:initial-creator" a + $$ metaTag "dc:creator" a + $$ metaTag "meta:creation-date" d + $$ metaTag "dc:date" d + ) (formatTime defaultTimeLocale "%FT%XZ" utctime) (intercalate "; " (map stringify authors)) $$ - maybe mempty - (metaTag "dc:date") - (normalizeDate (lookupMetaString "date" meta)) - $$ vcat userDefinedMeta ) ) diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 3e6652686..7ab50b8c4 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -58,7 +58,7 @@ import Text.Pandoc.MIME import qualified Data.ByteString.Lazy as BL import Text.Pandoc.Writers.OOXML import qualified Data.Map as M -import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes) +import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes, isNothing) import Text.Pandoc.ImageSize import Control.Applicative ((<|>)) import System.FilePath.Glob @@ -252,6 +252,8 @@ presentationToArchiveP p@(Presentation docProps slides) = do newArch' <- foldM copyFileToArchive emptyArchive filePaths -- we make a docProps/core.xml entry out of the presentation docprops docPropsEntry <- docPropsToEntry docProps + -- we make a docProps/custom.xml entry out of the custom properties + docCustomPropsEntry <- docCustomPropsToEntry docProps -- we make this ourself in case there's something unexpected in the -- one in the reference doc. relsEntry <- topLevelRelsEntry @@ -274,7 +276,8 @@ presentationToArchiveP p@(Presentation docProps slides) = do spkNotesEntries ++ spkNotesRelEntries ++ mediaEntries ++ - [contentTypesEntry, docPropsEntry, relsEntry, presEntry, presRelsEntry] + [contentTypesEntry, docPropsEntry, docCustomPropsEntry, relsEntry, + presEntry, presRelsEntry] makeSlideIdMap :: Presentation -> M.Map SlideId Int makeSlideIdMap (Presentation _ slides) = @@ -1425,6 +1428,10 @@ topLevelRels = , relType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/extended-properties" , relTarget = "docProps/app.xml" } + , Relationship { relId = 4 + , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties" + , relTarget = "docProps/custom.xml" + } ] topLevelRelsEntry :: PandocMonad m => P m Entry @@ -1657,7 +1664,7 @@ docPropsElement :: PandocMonad m => DocProps -> P m Element docPropsElement docProps = do utctime <- asks envUTCTime let keywords = case dcKeywords docProps of - Just xs -> intercalate "," xs + Just xs -> intercalate ", " xs Nothing -> "" return $ mknode "cp:coreProperties" @@ -1669,7 +1676,13 @@ docPropsElement docProps = do $ (mknode "dc:title" [] $ fromMaybe "" $ dcTitle docProps) : (mknode "dc:creator" [] $ fromMaybe "" $ dcCreator docProps) : (mknode "cp:keywords" [] keywords) - : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x + : (if isNothing (dcSubject docProps) then [] else + [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps]) + ++ (if isNothing (dcDescription docProps) then [] else + [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps]) + ++ (if isNothing (cpCategory docProps) then [] else + [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps]) + ++ (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) @@ -1677,6 +1690,21 @@ docPropsToEntry :: PandocMonad m => DocProps -> P m Entry docPropsToEntry docProps = docPropsElement docProps >>= elemToEntry "docProps/core.xml" +-- adapted from the Docx writer +docCustomPropsElement :: PandocMonad m => DocProps -> P m Element +docCustomPropsElement docProps = do + let mkCustomProp (k, v) pid = mknode "property" + [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") + ,("pid", show pid) + ,("name", k)] $ mknode "vt:lpwstr" [] v + return $ mknode "Properties" + [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties") + ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes") + ] $ zipWith mkCustomProp (fromMaybe [] $ customProperties docProps) [(2 :: Int)..] + +docCustomPropsToEntry :: PandocMonad m => DocProps -> P m Entry +docCustomPropsToEntry docProps = docCustomPropsElement docProps >>= + elemToEntry "docProps/custom.xml" defaultContentTypeToElem :: DefaultContentType -> Element defaultContentTypeToElem dct = @@ -1765,6 +1793,7 @@ presentationToContentTypes p@(Presentation _ slides) = do inheritedOverrides = mapMaybe pathToOverride filePaths docPropsOverride = mapMaybe pathToOverride ["docProps/core.xml"] + docCustomPropsOverride = mapMaybe pathToOverride ["docProps/custom.xml"] presOverride = mapMaybe pathToOverride ["ppt/presentation.xml"] relativePaths <- mapM slideToFilePath slides let slideOverrides = mapMaybe @@ -1773,7 +1802,8 @@ presentationToContentTypes p@(Presentation _ slides) = do speakerNotesOverrides <- (mapMaybe pathToOverride) <$> getSpeakerNotesFilePaths return $ ContentTypes (defaults ++ mediaDefaults) - (inheritedOverrides ++ docPropsOverride ++ presOverride ++ slideOverrides ++ speakerNotesOverrides) + (inheritedOverrides ++ docPropsOverride ++ docCustomPropsOverride ++ + presOverride ++ slideOverrides ++ speakerNotesOverrides) presML :: String presML = "application/vnd.openxmlformats-officedocument.presentationml" @@ -1788,6 +1818,7 @@ getContentType fp | fp == "ppt/viewProps.xml" = Just $ presML ++ ".viewProps+xml" | fp == "ppt/tableStyles.xml" = Just $ presML ++ ".tableStyles+xml" | fp == "docProps/core.xml" = Just $ "application/vnd.openxmlformats-package.core-properties+xml" + | fp == "docProps/custom.xml" = Just $ "application/vnd.openxmlformats-officedocument.custom-properties+xml" | fp == "docProps/app.xml" = Just $ noPresML ++ ".extended-properties+xml" | "ppt" : "slideMasters" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs index 7897f2b11..fee1a0f6e 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -72,7 +72,8 @@ import Text.Pandoc.Logging import Text.Pandoc.Walk import Data.Time (UTCTime) import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element" -import Text.Pandoc.Writers.Shared (lookupMetaInlines, toTableOfContents) +import Text.Pandoc.Writers.Shared (lookupMetaInlines, lookupMetaBlocks + , lookupMetaString, toTableOfContents) import qualified Data.Map as M import qualified Data.Set as S import Data.Maybe (maybeToList, fromMaybe) @@ -180,7 +181,10 @@ data DocProps = DocProps { dcTitle :: Maybe String , dcSubject :: Maybe String , dcCreator :: Maybe String , dcKeywords :: Maybe [String] + , dcDescription :: Maybe String + , cpCategory :: Maybe String , dcCreated :: Maybe UTCTime + , customProperties :: Maybe [(String, String)] } deriving (Show, Eq) @@ -930,13 +934,26 @@ metaToDocProps meta = authors = case map Shared.stringify $ docAuthors meta of [] -> Nothing - ss -> Just $ intercalate ";" ss + ss -> Just $ intercalate "; " ss + + description = case map Shared.stringify $ lookupMetaBlocks "description" meta of + [] -> Nothing + ss -> Just $ intercalate "_x000d_\n" ss + + customProperties' = case [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta) + , k `notElem` (["title", "author", "keywords", "description" + , "subject","lang","category"])] of + [] -> Nothing + ss -> Just ss in DocProps{ dcTitle = Shared.stringify <$> lookupMeta "title" meta , dcSubject = Shared.stringify <$> lookupMeta "subject" meta , dcCreator = authors , dcKeywords = keywords + , dcDescription = description + , cpCategory = Shared.stringify <$> lookupMeta "category" meta , dcCreated = Nothing + , customProperties = customProperties' } documentToPresentation :: WriterOptions -- cgit v1.2.3