aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorAgustín Martín Barbero <agusmbaterra@gmail.com>2019-01-26 16:14:35 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2019-01-26 16:14:35 -0800
commit9894d05fe3fd239247f755c60dc22247360be958 (patch)
tree92f5d6240edfd3bf72cc3c30295d18bef76a4077 /src/Text/Pandoc
parentff0aaa549d51384ef3cdcb063706dee4f6143444 (diff)
downloadpandoc-9894d05fe3fd239247f755c60dc22247360be958.tar.gz
Improve writing metadata for docx, pptx and odt (#5252)
* docx writer: support custom properties. Solves the writer part of #3024. Also supports additional core properties: `subject`, `lang`, `category`, `description`. * odt writer: improve standard properties, including the following core properties: `generator` (Pandoc/VERSION), `description`, `subject`, `keywords`, `initial-creator` (from authors), `creation-date` (actual creation date). Also fix date. * pptx writer: support custom properties. Also supports additional core properties: `subject`, `category`, `description`. * Includes golden tests. * MANUAL: document metadata support for docx, odt, pptx writers
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs19
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs36
-rw-r--r--src/Text/Pandoc/Writers/Powerpoint/Output.hs41
-rw-r--r--src/Text/Pandoc/Writers/Powerpoint/Presentation.hs21
4 files changed, 99 insertions, 18 deletions
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 4f642871a..cee339ac7 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -496,7 +496,17 @@ writeDocx opts doc@(Pandoc meta _) = do
Just (MetaList xs) -> map stringify xs
_ -> []
+ -- docProps/core.xml
let docPropsPath = "docProps/core.xml"
+ let extraCoreProps = ["subject","lang","category","description"]
+ let extraCorePropsMap = M.fromList $ zip extraCoreProps
+ ["dc:subject","dc:language","cp:category","dc:description"]
+ let lookupMetaString' :: String -> Meta -> String
+ lookupMetaString' key' meta' =
+ case key' of
+ "description" -> intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta')
+ _ -> lookupMetaString key' meta'
+
let docProps = mknode "cp:coreProperties"
[("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
,("xmlns:dc","http://purl.org/dc/elements/1.1/")
@@ -505,14 +515,19 @@ writeDocx opts doc@(Pandoc meta _) = do
,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
$ mknode "dc:title" [] (stringify $ docTitle meta)
: mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta))
- : mknode "cp:keywords" [] (intercalate ", " keywords)
+ : [ mknode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta)
+ | k <- M.keys (unMeta meta), k `elem` extraCoreProps]
+ ++ mknode "cp:keywords" [] (intercalate ", " keywords)
: (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
, mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
+ -- docProps/custom.xml
let customProperties :: [(String, String)]
- customProperties = [] -- FIXME
+ customProperties = [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta)
+ , k `notElem` (["title", "author", "keywords"]
+ ++ extraCoreProps)]
let mkCustomProp (k, v) pid = mknode "property"
[("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
,("pid", show pid)
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index a03575134..481e88da2 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -40,6 +40,7 @@ import Data.List (isPrefixOf, intercalate)
import Data.Maybe (fromMaybe)
import qualified Data.Map as Map
import qualified Data.Text.Lazy as TL
+import Data.Time
import System.FilePath (takeDirectory, takeExtension, (<.>))
import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
import Text.Pandoc.Class (PandocMonad, report, toLang)
@@ -50,8 +51,9 @@ import Text.Pandoc.Logging
import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
import Text.Pandoc.Pretty
-import Text.Pandoc.Shared (stringify, normalizeDate)
-import Text.Pandoc.Writers.Shared (lookupMetaString, fixDisplayMath)
+import Text.Pandoc.Shared (stringify, pandocVersion)
+import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
+ fixDisplayMath)
import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
import Text.Pandoc.Walk
import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
@@ -83,6 +85,7 @@ pandocToODT :: PandocMonad m
pandocToODT opts doc@(Pandoc meta _) = do
let title = docTitle meta
let authors = docAuthors meta
+ utctime <- P.getCurrentTime
lang <- toLang (getLang opts meta)
refArchive <-
case writerReferenceDoc opts of
@@ -125,9 +128,14 @@ pandocToODT opts doc@(Pandoc meta _) = do
)
)
let archive' = addEntryToArchive manifestEntry archive
+ -- create meta.xml
let userDefinedMetaFields = [k | k <- Map.keys (unMeta meta)
- , k `notElem` ["title", "lang", "author", "date"]]
+ , k `notElem` ["title", "lang", "author"
+ , "description", "subject", "keywords"]]
let escapedText = text . escapeStringForXML
+ let keywords = case lookupMeta "keywords" meta of
+ Just (MetaList xs) -> map stringify xs
+ _ -> []
let userDefinedMeta =
map (\k -> inTags False "meta:user-defined"
[ ("meta:name", escapeStringForXML k)
@@ -146,19 +154,29 @@ pandocToODT opts doc@(Pandoc meta _) = do
,("xmlns:ooo","http://openoffice.org/2004/office")
,("xmlns:grddl","http://www.w3.org/2003/g/data-view#")
,("office:version","1.2")] ( inTags True "office:meta" [] $
- ( metaTag "dc:title" (stringify title)
+ ( metaTag "meta:generator" ("Pandoc/" ++ pandocVersion)
+ $$
+ metaTag "dc:title" (stringify title)
+ $$
+ metaTag "dc:description"
+ (intercalate "\n" (map stringify $
+ lookupMetaBlocks "description" meta))
+ $$
+ metaTag "dc:subject" (lookupMetaString "subject" meta)
+ $$
+ metaTag "meta:keyword" (intercalate ", " keywords)
$$
case lang of
Just l -> metaTag "dc:language" (renderLang l)
Nothing -> empty
$$
- metaTag "dc:creator"
+ (\d a -> metaTag "meta:initial-creator" a
+ $$ metaTag "dc:creator" a
+ $$ metaTag "meta:creation-date" d
+ $$ metaTag "dc:date" d
+ ) (formatTime defaultTimeLocale "%FT%XZ" utctime)
(intercalate "; " (map stringify authors))
$$
- maybe mempty
- (metaTag "dc:date")
- (normalizeDate (lookupMetaString "date" meta))
- $$
vcat userDefinedMeta
)
)
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 3e6652686..7ab50b8c4 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -58,7 +58,7 @@ import Text.Pandoc.MIME
import qualified Data.ByteString.Lazy as BL
import Text.Pandoc.Writers.OOXML
import qualified Data.Map as M
-import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes)
+import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes, isNothing)
import Text.Pandoc.ImageSize
import Control.Applicative ((<|>))
import System.FilePath.Glob
@@ -252,6 +252,8 @@ presentationToArchiveP p@(Presentation docProps slides) = do
newArch' <- foldM copyFileToArchive emptyArchive filePaths
-- we make a docProps/core.xml entry out of the presentation docprops
docPropsEntry <- docPropsToEntry docProps
+ -- we make a docProps/custom.xml entry out of the custom properties
+ docCustomPropsEntry <- docCustomPropsToEntry docProps
-- we make this ourself in case there's something unexpected in the
-- one in the reference doc.
relsEntry <- topLevelRelsEntry
@@ -274,7 +276,8 @@ presentationToArchiveP p@(Presentation docProps slides) = do
spkNotesEntries ++
spkNotesRelEntries ++
mediaEntries ++
- [contentTypesEntry, docPropsEntry, relsEntry, presEntry, presRelsEntry]
+ [contentTypesEntry, docPropsEntry, docCustomPropsEntry, relsEntry,
+ presEntry, presRelsEntry]
makeSlideIdMap :: Presentation -> M.Map SlideId Int
makeSlideIdMap (Presentation _ slides) =
@@ -1425,6 +1428,10 @@ topLevelRels =
, relType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/extended-properties"
, relTarget = "docProps/app.xml"
}
+ , Relationship { relId = 4
+ , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
+ , relTarget = "docProps/custom.xml"
+ }
]
topLevelRelsEntry :: PandocMonad m => P m Entry
@@ -1657,7 +1664,7 @@ docPropsElement :: PandocMonad m => DocProps -> P m Element
docPropsElement docProps = do
utctime <- asks envUTCTime
let keywords = case dcKeywords docProps of
- Just xs -> intercalate "," xs
+ Just xs -> intercalate ", " xs
Nothing -> ""
return $
mknode "cp:coreProperties"
@@ -1669,7 +1676,13 @@ docPropsElement docProps = do
$ (mknode "dc:title" [] $ fromMaybe "" $ dcTitle docProps)
: (mknode "dc:creator" [] $ fromMaybe "" $ dcCreator docProps)
: (mknode "cp:keywords" [] keywords)
- : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
+ : (if isNothing (dcSubject docProps) then [] else
+ [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps])
+ ++ (if isNothing (dcDescription docProps) then [] else
+ [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps])
+ ++ (if isNothing (cpCategory docProps) then [] else
+ [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps])
+ ++ (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
, mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
@@ -1677,6 +1690,21 @@ docPropsToEntry :: PandocMonad m => DocProps -> P m Entry
docPropsToEntry docProps = docPropsElement docProps >>=
elemToEntry "docProps/core.xml"
+-- adapted from the Docx writer
+docCustomPropsElement :: PandocMonad m => DocProps -> P m Element
+docCustomPropsElement docProps = do
+ let mkCustomProp (k, v) pid = mknode "property"
+ [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
+ ,("pid", show pid)
+ ,("name", k)] $ mknode "vt:lpwstr" [] v
+ return $ mknode "Properties"
+ [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties")
+ ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes")
+ ] $ zipWith mkCustomProp (fromMaybe [] $ customProperties docProps) [(2 :: Int)..]
+
+docCustomPropsToEntry :: PandocMonad m => DocProps -> P m Entry
+docCustomPropsToEntry docProps = docCustomPropsElement docProps >>=
+ elemToEntry "docProps/custom.xml"
defaultContentTypeToElem :: DefaultContentType -> Element
defaultContentTypeToElem dct =
@@ -1765,6 +1793,7 @@ presentationToContentTypes p@(Presentation _ slides) = do
inheritedOverrides = mapMaybe pathToOverride filePaths
docPropsOverride = mapMaybe pathToOverride ["docProps/core.xml"]
+ docCustomPropsOverride = mapMaybe pathToOverride ["docProps/custom.xml"]
presOverride = mapMaybe pathToOverride ["ppt/presentation.xml"]
relativePaths <- mapM slideToFilePath slides
let slideOverrides = mapMaybe
@@ -1773,7 +1802,8 @@ presentationToContentTypes p@(Presentation _ slides) = do
speakerNotesOverrides <- (mapMaybe pathToOverride) <$> getSpeakerNotesFilePaths
return $ ContentTypes
(defaults ++ mediaDefaults)
- (inheritedOverrides ++ docPropsOverride ++ presOverride ++ slideOverrides ++ speakerNotesOverrides)
+ (inheritedOverrides ++ docPropsOverride ++ docCustomPropsOverride ++
+ presOverride ++ slideOverrides ++ speakerNotesOverrides)
presML :: String
presML = "application/vnd.openxmlformats-officedocument.presentationml"
@@ -1788,6 +1818,7 @@ getContentType fp
| fp == "ppt/viewProps.xml" = Just $ presML ++ ".viewProps+xml"
| fp == "ppt/tableStyles.xml" = Just $ presML ++ ".tableStyles+xml"
| fp == "docProps/core.xml" = Just $ "application/vnd.openxmlformats-package.core-properties+xml"
+ | fp == "docProps/custom.xml" = Just $ "application/vnd.openxmlformats-officedocument.custom-properties+xml"
| fp == "docProps/app.xml" = Just $ noPresML ++ ".extended-properties+xml"
| "ppt" : "slideMasters" : f : [] <- splitDirectories fp
, (_, ".xml") <- splitExtension f =
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
index 7897f2b11..fee1a0f6e 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
@@ -72,7 +72,8 @@ import Text.Pandoc.Logging
import Text.Pandoc.Walk
import Data.Time (UTCTime)
import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element"
-import Text.Pandoc.Writers.Shared (lookupMetaInlines, toTableOfContents)
+import Text.Pandoc.Writers.Shared (lookupMetaInlines, lookupMetaBlocks
+ , lookupMetaString, toTableOfContents)
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Maybe (maybeToList, fromMaybe)
@@ -180,7 +181,10 @@ data DocProps = DocProps { dcTitle :: Maybe String
, dcSubject :: Maybe String
, dcCreator :: Maybe String
, dcKeywords :: Maybe [String]
+ , dcDescription :: Maybe String
+ , cpCategory :: Maybe String
, dcCreated :: Maybe UTCTime
+ , customProperties :: Maybe [(String, String)]
} deriving (Show, Eq)
@@ -930,13 +934,26 @@ metaToDocProps meta =
authors = case map Shared.stringify $ docAuthors meta of
[] -> Nothing
- ss -> Just $ intercalate ";" ss
+ ss -> Just $ intercalate "; " ss
+
+ description = case map Shared.stringify $ lookupMetaBlocks "description" meta of
+ [] -> Nothing
+ ss -> Just $ intercalate "_x000d_\n" ss
+
+ customProperties' = case [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta)
+ , k `notElem` (["title", "author", "keywords", "description"
+ , "subject","lang","category"])] of
+ [] -> Nothing
+ ss -> Just ss
in
DocProps{ dcTitle = Shared.stringify <$> lookupMeta "title" meta
, dcSubject = Shared.stringify <$> lookupMeta "subject" meta
, dcCreator = authors
, dcKeywords = keywords
+ , dcDescription = description
+ , cpCategory = Shared.stringify <$> lookupMeta "category" meta
, dcCreated = Nothing
+ , customProperties = customProperties'
}
documentToPresentation :: WriterOptions