diff options
| author | Agustín Martín Barbero <agusmbaterra@gmail.com> | 2019-01-26 16:14:35 -0800 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2019-01-26 16:14:35 -0800 | 
| commit | 9894d05fe3fd239247f755c60dc22247360be958 (patch) | |
| tree | 92f5d6240edfd3bf72cc3c30295d18bef76a4077 /src/Text | |
| parent | ff0aaa549d51384ef3cdcb063706dee4f6143444 (diff) | |
| download | pandoc-9894d05fe3fd239247f755c60dc22247360be958.tar.gz | |
Improve writing metadata for docx, pptx and odt (#5252)
* docx writer: support custom properties.  Solves the writer part of #3024.
  Also supports additional core properties:  `subject`, `lang`, `category`,
  `description`.
* odt writer: improve standard properties, including the following core properties:
  `generator` (Pandoc/VERSION), `description`, `subject`, `keywords`,
  `initial-creator` (from authors), `creation-date` (actual creation date).
  Also fix date.
* pptx writer: support custom properties.  Also supports additional core
  properties: `subject`, `category`, `description`.
* Includes golden tests.
* MANUAL: document metadata support for docx, odt, pptx writers
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 19 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/ODT.hs | 36 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Powerpoint/Output.hs | 41 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Powerpoint/Presentation.hs | 21 | 
4 files changed, 99 insertions, 18 deletions
| diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 4f642871a..cee339ac7 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -496,7 +496,17 @@ writeDocx opts doc@(Pandoc meta _) = do                         Just (MetaList xs) -> map stringify xs                         _                  -> [] +  -- docProps/core.xml    let docPropsPath = "docProps/core.xml" +  let extraCoreProps = ["subject","lang","category","description"] +  let extraCorePropsMap = M.fromList $ zip extraCoreProps +                       ["dc:subject","dc:language","cp:category","dc:description"] +  let lookupMetaString' :: String -> Meta -> String +      lookupMetaString' key' meta' = +        case key' of +             "description"    -> intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta') +             _                -> lookupMetaString key' meta' +      let docProps = mknode "cp:coreProperties"            [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")            ,("xmlns:dc","http://purl.org/dc/elements/1.1/") @@ -505,14 +515,19 @@ writeDocx opts doc@(Pandoc meta _) = do            ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]            $ mknode "dc:title" [] (stringify $ docTitle meta)            : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) -          : mknode "cp:keywords" [] (intercalate ", " keywords) +          : [ mknode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta) +            | k <- M.keys (unMeta meta), k `elem` extraCoreProps] +          ++ mknode "cp:keywords" [] (intercalate ", " keywords)            : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x                     , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x                     ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)    let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps +  -- docProps/custom.xml    let customProperties :: [(String, String)] -      customProperties = [] -- FIXME +      customProperties = [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta) +                         , k `notElem` (["title", "author", "keywords"] +                                       ++ extraCoreProps)]    let mkCustomProp (k, v) pid = mknode "property"           [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")           ,("pid", show pid) diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index a03575134..481e88da2 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -40,6 +40,7 @@ import Data.List (isPrefixOf, intercalate)  import Data.Maybe (fromMaybe)  import qualified Data.Map as Map  import qualified Data.Text.Lazy as TL +import Data.Time  import System.FilePath (takeDirectory, takeExtension, (<.>))  import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)  import Text.Pandoc.Class (PandocMonad, report, toLang) @@ -50,8 +51,9 @@ import Text.Pandoc.Logging  import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)  import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))  import Text.Pandoc.Pretty -import Text.Pandoc.Shared (stringify, normalizeDate) -import Text.Pandoc.Writers.Shared (lookupMetaString, fixDisplayMath) +import Text.Pandoc.Shared (stringify, pandocVersion) +import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, +                                   fixDisplayMath)  import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)  import Text.Pandoc.Walk  import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) @@ -83,6 +85,7 @@ pandocToODT :: PandocMonad m  pandocToODT opts doc@(Pandoc meta _) = do    let title = docTitle meta    let authors = docAuthors meta +  utctime <- P.getCurrentTime    lang <- toLang (getLang opts meta)    refArchive <-         case writerReferenceDoc opts of @@ -125,9 +128,14 @@ pandocToODT opts doc@(Pandoc meta _) = do                )           )    let archive' = addEntryToArchive manifestEntry archive +  -- create meta.xml    let userDefinedMetaFields = [k | k <- Map.keys (unMeta meta) -                              , k `notElem` ["title", "lang", "author", "date"]] +                              , k `notElem` ["title", "lang", "author" +                                           , "description", "subject", "keywords"]]    let escapedText = text . escapeStringForXML +  let keywords = case lookupMeta "keywords" meta of +                      Just (MetaList xs) -> map stringify xs +                      _                  -> []    let userDefinedMeta =          map (\k -> inTags False "meta:user-defined"                [ ("meta:name", escapeStringForXML k) @@ -146,19 +154,29 @@ pandocToODT opts doc@(Pandoc meta _) = do             ,("xmlns:ooo","http://openoffice.org/2004/office")             ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#")             ,("office:version","1.2")] ( inTags True "office:meta" [] $ -                 ( metaTag "dc:title" (stringify title) +                 ( metaTag "meta:generator" ("Pandoc/" ++ pandocVersion) +                   $$ +                   metaTag "dc:title" (stringify title) +                   $$ +                   metaTag "dc:description" +                          (intercalate "\n" (map stringify $ +                                         lookupMetaBlocks "description" meta)) +                   $$ +                   metaTag "dc:subject" (lookupMetaString "subject" meta) +                   $$ +                   metaTag "meta:keyword" (intercalate ", " keywords)                     $$                     case lang of                          Just l  -> metaTag "dc:language" (renderLang l)                          Nothing -> empty                     $$ -                   metaTag "dc:creator" +                   (\d a -> metaTag "meta:initial-creator" a +                         $$ metaTag "dc:creator" a +                         $$ metaTag "meta:creation-date" d +                         $$ metaTag "dc:date" d +                   ) (formatTime defaultTimeLocale "%FT%XZ" utctime)                       (intercalate "; " (map stringify authors))                     $$ -                   maybe mempty -                     (metaTag "dc:date") -                       (normalizeDate (lookupMetaString "date" meta)) -                   $$                     vcat userDefinedMeta                   )               ) diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 3e6652686..7ab50b8c4 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -58,7 +58,7 @@ import Text.Pandoc.MIME  import qualified Data.ByteString.Lazy as BL  import Text.Pandoc.Writers.OOXML  import qualified Data.Map as M -import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes) +import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes, isNothing)  import Text.Pandoc.ImageSize  import Control.Applicative ((<|>))  import System.FilePath.Glob @@ -252,6 +252,8 @@ presentationToArchiveP p@(Presentation docProps slides) = do    newArch' <- foldM copyFileToArchive emptyArchive filePaths    -- we make a docProps/core.xml entry out of the presentation docprops    docPropsEntry <- docPropsToEntry docProps +  -- we make a docProps/custom.xml entry out of the custom properties +  docCustomPropsEntry <- docCustomPropsToEntry docProps    -- we make this ourself in case there's something unexpected in the    -- one in the reference doc.    relsEntry <- topLevelRelsEntry @@ -274,7 +276,8 @@ presentationToArchiveP p@(Presentation docProps slides) = do      spkNotesEntries ++      spkNotesRelEntries ++      mediaEntries ++ -    [contentTypesEntry, docPropsEntry, relsEntry, presEntry, presRelsEntry] +    [contentTypesEntry, docPropsEntry, docCustomPropsEntry, relsEntry, +     presEntry, presRelsEntry]  makeSlideIdMap :: Presentation -> M.Map SlideId Int  makeSlideIdMap (Presentation _ slides) = @@ -1425,6 +1428,10 @@ topLevelRels =                   , relType = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/extended-properties"                   , relTarget = "docProps/app.xml"                   } +  , Relationship { relId = 4 +                 , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties" +                 , relTarget = "docProps/custom.xml" +                 }    ]  topLevelRelsEntry :: PandocMonad m => P m Entry @@ -1657,7 +1664,7 @@ docPropsElement :: PandocMonad m => DocProps -> P m Element  docPropsElement docProps = do    utctime <- asks envUTCTime    let keywords = case dcKeywords docProps of -        Just xs -> intercalate "," xs +        Just xs -> intercalate ", " xs          Nothing -> ""    return $      mknode "cp:coreProperties" @@ -1669,7 +1676,13 @@ docPropsElement docProps = do      $ (mknode "dc:title" [] $ fromMaybe "" $ dcTitle docProps)      : (mknode "dc:creator" [] $ fromMaybe "" $ dcCreator docProps)      : (mknode "cp:keywords" [] keywords) -    : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x +    : (if isNothing (dcSubject docProps) then [] else +           [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps]) +    ++ (if isNothing (dcDescription docProps) then [] else +           [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps]) +    ++ (if isNothing (cpCategory docProps) then [] else +           [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps]) +    ++ (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x               , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x               ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) @@ -1677,6 +1690,21 @@ docPropsToEntry :: PandocMonad m => DocProps -> P m Entry  docPropsToEntry docProps = docPropsElement docProps >>=                             elemToEntry "docProps/core.xml" +-- adapted from the Docx writer +docCustomPropsElement :: PandocMonad m => DocProps -> P m Element +docCustomPropsElement docProps = do +  let mkCustomProp (k, v) pid = mknode "property" +         [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") +         ,("pid", show pid) +         ,("name", k)] $ mknode "vt:lpwstr" [] v +  return $ mknode "Properties" +          [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties") +          ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes") +          ] $ zipWith mkCustomProp (fromMaybe [] $ customProperties docProps) [(2 :: Int)..] + +docCustomPropsToEntry :: PandocMonad m => DocProps -> P m Entry +docCustomPropsToEntry docProps = docCustomPropsElement docProps >>= +                           elemToEntry "docProps/custom.xml"  defaultContentTypeToElem :: DefaultContentType -> Element  defaultContentTypeToElem dct = @@ -1765,6 +1793,7 @@ presentationToContentTypes p@(Presentation _ slides) = do        inheritedOverrides = mapMaybe pathToOverride filePaths        docPropsOverride = mapMaybe pathToOverride ["docProps/core.xml"] +      docCustomPropsOverride = mapMaybe pathToOverride ["docProps/custom.xml"]        presOverride = mapMaybe pathToOverride ["ppt/presentation.xml"]    relativePaths <- mapM slideToFilePath slides    let slideOverrides = mapMaybe @@ -1773,7 +1802,8 @@ presentationToContentTypes p@(Presentation _ slides) = do    speakerNotesOverrides <- (mapMaybe pathToOverride) <$> getSpeakerNotesFilePaths    return $ ContentTypes      (defaults ++ mediaDefaults) -    (inheritedOverrides ++ docPropsOverride ++ presOverride ++ slideOverrides ++ speakerNotesOverrides) +    (inheritedOverrides ++ docPropsOverride ++ docCustomPropsOverride ++ +     presOverride ++ slideOverrides ++ speakerNotesOverrides)  presML :: String  presML = "application/vnd.openxmlformats-officedocument.presentationml" @@ -1788,6 +1818,7 @@ getContentType fp    | fp == "ppt/viewProps.xml" = Just $ presML ++ ".viewProps+xml"    | fp == "ppt/tableStyles.xml" = Just $ presML ++ ".tableStyles+xml"    | fp == "docProps/core.xml" = Just $ "application/vnd.openxmlformats-package.core-properties+xml" +  | fp == "docProps/custom.xml" = Just $ "application/vnd.openxmlformats-officedocument.custom-properties+xml"    | fp == "docProps/app.xml" = Just $ noPresML ++ ".extended-properties+xml"    | "ppt" : "slideMasters" : f : [] <- splitDirectories fp    , (_, ".xml") <- splitExtension f = diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs index 7897f2b11..fee1a0f6e 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -72,7 +72,8 @@ import Text.Pandoc.Logging  import Text.Pandoc.Walk  import Data.Time (UTCTime)  import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element" -import Text.Pandoc.Writers.Shared (lookupMetaInlines, toTableOfContents) +import Text.Pandoc.Writers.Shared (lookupMetaInlines, lookupMetaBlocks +                                 , lookupMetaString, toTableOfContents)  import qualified Data.Map as M  import qualified Data.Set as S  import Data.Maybe (maybeToList, fromMaybe) @@ -180,7 +181,10 @@ data DocProps = DocProps { dcTitle :: Maybe String                           , dcSubject :: Maybe String                           , dcCreator :: Maybe String                           , dcKeywords :: Maybe [String] +                         , dcDescription :: Maybe String +                         , cpCategory :: Maybe String                           , dcCreated :: Maybe UTCTime +                         , customProperties :: Maybe [(String, String)]                           } deriving (Show, Eq) @@ -930,13 +934,26 @@ metaToDocProps meta =        authors = case map Shared.stringify $ docAuthors meta of                    [] -> Nothing -                  ss -> Just $ intercalate ";" ss +                  ss -> Just $ intercalate "; " ss + +      description = case map Shared.stringify $ lookupMetaBlocks "description" meta of +                  [] -> Nothing +                  ss -> Just $ intercalate "_x000d_\n" ss + +      customProperties' = case [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta) +                               , k `notElem` (["title", "author", "keywords", "description" +                                             , "subject","lang","category"])] of +                  [] -> Nothing +                  ss -> Just ss    in      DocProps{ dcTitle = Shared.stringify <$> lookupMeta "title" meta              , dcSubject = Shared.stringify <$> lookupMeta "subject" meta              , dcCreator = authors              , dcKeywords = keywords +            , dcDescription = description +            , cpCategory = Shared.stringify <$> lookupMeta "category" meta              , dcCreated = Nothing +            , customProperties = customProperties'              }  documentToPresentation :: WriterOptions | 
