diff options
-rw-r--r-- | MANUAL.txt | 14 | ||||
-rw-r--r-- | pandoc.cabal | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/App.hs | 17 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 101 | ||||
-rw-r--r-- | test/command/yaml-metadata-blocks.md | 63 | ||||
-rw-r--r-- | test/command/yaml-metadata.yaml | 4 |
6 files changed, 150 insertions, 50 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index ddd849850..ecb2bcb2c 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -578,6 +578,16 @@ Reader options printed in some output formats) and metadata values will be escaped when inserted into the template. +`--metadata-file=`*FILE* + +: Read metadata from the supplied YAML (or JSON) file. + This option can be used with every input format, but string + scalars in the YAML file will always be parsed as Markdown. + Generally, the input will be handled the same as in + [YAML metadata blocks][Extension: `yaml_metadata_block`]. + Metadata values specified inside the document, or by using `-M`, + overwrite values specified with this option. + `-p`, `--preserve-tabs` : Preserve tabs instead of converting them to spaces (the default). @@ -3061,7 +3071,9 @@ and pass it to pandoc as an argument, along with your Markdown files: pandoc chap1.md chap2.md chap3.md metadata.yaml -s -o book.html Just be sure that the YAML file begins with `---` and ends with `---` or -`...`.) +`...`.) Alternatively, you can use the `--metadata-file` option. Using +that approach however, you cannot reference content (like footnotes) +from the main markdown input document. Metadata will be taken from the fields of the YAML object and added to any existing document metadata. Metadata can contain lists and objects (nested diff --git a/pandoc.cabal b/pandoc.cabal index 86fe68dcf..591c1960c 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -193,6 +193,7 @@ extra-source-files: test/command/sub-file-chapter-1.tex test/command/sub-file-chapter-2.tex test/command/bar.tex + test/command/yaml-metadata.yaml test/command/3510-subdoc.org test/command/3510-export.latex test/command/3510-src.hs diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 44bb30223..cb1db4f89 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -89,6 +89,7 @@ import Text.Pandoc.Builder (setMeta, deleteMeta) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.Highlighting (highlightingStyles) import Text.Pandoc.PDF (makePDF) +import Text.Pandoc.Readers.Markdown (yamlToMeta) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, ordNub, safeRead, tabFilter, uriPathToPath) @@ -399,6 +400,10 @@ convertWithOpts opts = do ("application/xml", jatsCSL) return $ ("csl", jatsEncoded) : optMetadata opts else return $ optMetadata opts + metadataFromFile <- + case optMetadataFile opts of + Nothing -> return mempty + Just file -> readFileLazy file >>= yamlToMeta case lookup "lang" (optMetadata opts) of Just l -> case parseBCP47 l of @@ -491,6 +496,7 @@ convertWithOpts opts = do ( (if isJust (optExtractMedia opts) then fillMediaBag else return) + >=> return . addNonPresentMetadata metadataFromFile >=> return . addMetadata metadata >=> applyTransforms transforms >=> applyFilters readerOpts filters' [format] @@ -556,6 +562,7 @@ data Opt = Opt , optTemplate :: Maybe FilePath -- ^ Custom template , optVariables :: [(String,String)] -- ^ Template variables to set , optMetadata :: [(String, String)] -- ^ Metadata fields to set + , optMetadataFile :: Maybe FilePath -- ^ Name of YAML metadata file , optOutputFile :: Maybe FilePath -- ^ Name of output file , optInputFiles :: [FilePath] -- ^ Names of input files , optNumberSections :: Bool -- ^ Number sections in LaTeX @@ -628,6 +635,7 @@ defaultOpts = Opt , optTemplate = Nothing , optVariables = [] , optMetadata = [] + , optMetadataFile = Nothing , optOutputFile = Nothing , optInputFiles = [] , optNumberSections = False @@ -687,6 +695,9 @@ defaultOpts = Opt , optStripComments = False } +addNonPresentMetadata :: Text.Pandoc.Meta -> Pandoc -> Pandoc +addNonPresentMetadata newmeta (Pandoc meta bs) = Pandoc (meta <> newmeta) bs + addMetadata :: [(String, String)] -> Pandoc -> Pandoc addMetadata kvs pdc = foldr addMeta (removeMetaKeys kvs pdc) kvs @@ -963,6 +974,12 @@ options = "KEY[:VALUE]") "" + , Option "" ["metadata-file"] + (ReqArg + (\arg opt -> return opt{ optMetadataFile = Just arg }) + "FILE") + "" + , Option "V" ["variable"] (ReqArg (\arg opt -> do diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a81942a9e..502abae9a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,11 +31,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} -module Text.Pandoc.Readers.Markdown ( readMarkdown ) where +module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where import Prelude import Control.Monad import Control.Monad.Except (throwError) +import qualified Data.ByteString.Lazy as BS import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower) import Data.List (intercalate, sortBy, transpose, elemIndex) import qualified Data.Map as M @@ -233,7 +234,6 @@ pandocTitleBlock = try $ do yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks) yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block - pos <- getPosition string "---" blankline notFollowedBy blankline -- if --- is followed by a blank it's an HRULE @@ -241,47 +241,45 @@ yamlMetaBlock = try $ do -- by including --- and ..., we allow yaml blocks with just comments: let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - case YAML.decodeNode' YAML.failsafeSchemaResolver False False - (UTF8.fromStringLazy rawYaml) of - Right [YAML.Doc (YAML.Mapping _ hashmap)] -> - mapM_ (\(key, v) -> do - k <- nodeToKey key - if ignorable k - then return () - else do - v' <- yamlToMeta v - let k' = T.unpack k - updateState $ \st -> st{ stateMeta' = - do m <- stateMeta' st - -- if there's already a value, leave it unchanged - case lookupMeta k' m of - Just _ -> return m - Nothing -> do - v'' <- v' - return $ B.setMeta (T.unpack k) v'' m}) - (M.toList hashmap) - Right [] -> return () - Right [YAML.Doc (YAML.Scalar YAML.SNull)] -> return () + newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml + -- Since `<>` is left-biased, existing values are not touched: + updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF } + return mempty + +-- | Read a YAML string and convert it to pandoc metadata. +-- String scalars in the YAML are parsed as Markdown. +yamlToMeta :: PandocMonad m => BS.ByteString -> m Meta +yamlToMeta bstr = do + let parser = do + meta <- yamlBsToMeta bstr + return $ runF meta defaultParserState + parsed <- readWithM parser def "" + case parsed of + Right result -> return result + Left e -> throwError e + +yamlBsToMeta :: PandocMonad m => BS.ByteString -> MarkdownParser m (F Meta) +yamlBsToMeta bstr = do + pos <- getPosition + case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of + Right ((YAML.Doc (YAML.Mapping _ o)):_) -> (fmap Meta) <$> yamlMap o + Right [] -> return . return $ mempty + Right [YAML.Doc (YAML.Scalar YAML.SNull)] -> return . return $ mempty Right _ -> do logMessage $ CouldNotParseYamlMetadata "not an object" pos - return () + return . return $ mempty Left err' -> do logMessage $ CouldNotParseYamlMetadata err' pos - return () - return mempty + return . return $ mempty nodeToKey :: Monad m => YAML.Node -> m Text nodeToKey (YAML.Scalar (YAML.SStr t)) = return t nodeToKey (YAML.Scalar (YAML.SUnknown _ t)) = return t nodeToKey _ = fail "Non-string key in YAML mapping" --- ignore fields ending with _ -ignorable :: Text -> Bool -ignorable t = (T.pack "_") `T.isSuffixOf` t - toMetaValue :: PandocMonad m => Text -> MarkdownParser m (F MetaValue) toMetaValue x = @@ -309,9 +307,9 @@ checkBoolean t = then Just False else Nothing -yamlToMeta :: PandocMonad m - => YAML.Node -> MarkdownParser m (F MetaValue) -yamlToMeta (YAML.Scalar x) = +yamlToMetaValue :: PandocMonad m + => YAML.Node -> MarkdownParser m (F MetaValue) +yamlToMetaValue (YAML.Scalar x) = case x of YAML.SStr t -> toMetaValue t YAML.SBool b -> return $ return $ MetaBool b @@ -322,25 +320,30 @@ yamlToMeta (YAML.Scalar x) = Just b -> return $ return $ MetaBool b Nothing -> toMetaValue t YAML.SNull -> return $ return $ MetaString "" -yamlToMeta (YAML.Sequence _ xs) = do - xs' <- mapM yamlToMeta xs +yamlToMetaValue (YAML.Sequence _ xs) = do + xs' <- mapM yamlToMetaValue xs return $ do xs'' <- sequence xs' return $ B.toMetaValue xs'' -yamlToMeta (YAML.Mapping _ o) = - foldM (\m (key, v) -> do - k <- nodeToKey key - if ignorable k - then return m - else do - v' <- yamlToMeta v - return $ do - MetaMap m' <- m - v'' <- v' - return (MetaMap $ M.insert (T.unpack k) v'' m')) - (return $ MetaMap M.empty) - (M.toList o) -yamlToMeta _ = return $ return $ MetaString "" +yamlToMetaValue (YAML.Mapping _ o) = fmap B.toMetaValue <$> yamlMap o +yamlToMetaValue _ = return $ return $ MetaString "" + +yamlMap :: PandocMonad m + => M.Map YAML.Node YAML.Node + -> MarkdownParser m (F (M.Map String MetaValue)) +yamlMap o = do + kvs <- forM (M.toList o) $ \(key, v) -> do + k <- nodeToKey key + return (k, v) + let kvs' = filter (not . ignorable . fst) kvs + (fmap M.fromList . sequence) <$> mapM toMeta kvs' + where + ignorable t = (T.pack "_") `T.isSuffixOf` t + toMeta (k, v) = do + fv <- yamlToMetaValue v + return $ do + v' <- fv + return (T.unpack k, v') stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () diff --git a/test/command/yaml-metadata-blocks.md b/test/command/yaml-metadata-blocks.md new file mode 100644 index 000000000..5b73cff72 --- /dev/null +++ b/test/command/yaml-metadata-blocks.md @@ -0,0 +1,63 @@ +``` +% pandoc -s -t native +--- +foobar_: this should be ignored +foo: + bar_: as should this +--- +^D +Pandoc (Meta {unMeta = fromList [("foo",MetaMap (fromList []))]}) +[] +``` +``` +% pandoc -s -t native +--- +# For precedence, see multiple-metadata-blocks.md and vars-and-metadata.md +# For Bools, see also 4819.md +# For Multiline strings, see yaml-with-chomp.md +int: 7 +float: 1.5 +scientific: 3.7e-5 +bool: true +more: False +nothing: null +emtpy: [] +nested: + int: 8 + float: 2.5 + bool: true + more: False + nothing: null + emtpy: [] + scientific: 3.7e-5 +--- +^D +Pandoc (Meta {unMeta = fromList [("bool",MetaBool True),("emtpy",MetaList []),("float",MetaInlines [Str "1.5"]),("int",MetaInlines [Str "7"]),("more",MetaBool False),("nested",MetaMap (fromList [("bool",MetaBool True),("emtpy",MetaList []),("float",MetaInlines [Str "2.5"]),("int",MetaInlines [Str "8"]),("more",MetaBool False),("nothing",MetaInlines [Str "null"]),("scientific",MetaInlines [Str "3.7e-5"])])),("nothing",MetaInlines [Str "null"]),("scientific",MetaInlines [Str "3.7e-5"])]}) +[] +``` +``` +% pandoc -s -t native +--- +array: + - foo: bar + - bool: True +--- +^D +Pandoc (Meta {unMeta = fromList [("array",MetaList [MetaMap (fromList [("foo",MetaInlines [Str "bar"])]),MetaMap (fromList [("bool",MetaBool True)])])]}) +[] +``` +``` +% pandoc -s -t native --metadata-file command/yaml-metadata.yaml +--- +title: document +--- +^D +Pandoc (Meta {unMeta = fromList [("other",MetaInlines [Emph [Str "markdown"],Space,Str "value"]),("title",MetaInlines [Str "document"])]}) +[] +``` +``` +% pandoc -s -t native --metadata-file command/yaml-metadata.yaml -M title=cmdline +^D +Pandoc (Meta {unMeta = fromList [("other",MetaInlines [Emph [Str "markdown"],Space,Str "value"]),("title",MetaString "cmdline")]}) +[] +``` diff --git a/test/command/yaml-metadata.yaml b/test/command/yaml-metadata.yaml new file mode 100644 index 000000000..9cd0043d3 --- /dev/null +++ b/test/command/yaml-metadata.yaml @@ -0,0 +1,4 @@ +--- +title: file +other: _markdown_ value +--- |