aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2019-11-24 11:50:28 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2019-11-24 11:50:28 -0800
commit659ee981764b85fb46845c086e3b10f1fc57a712 (patch)
tree6696a8b6ab6e81c76caf4e36dfecd0c08dac06ba /src/Text/Pandoc
parentda5b6d5c0bc441732f36f5a2f7577f5108caaa7f (diff)
downloadpandoc-659ee981764b85fb46845c086e3b10f1fc57a712.tar.gz
Add unexported Text.Pandoc.Readers.Metadata.
For YAML metadata parsing. A step in the direction of #5914. No API change.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/App.hs2
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs125
-rw-r--r--src/Text/Pandoc/Readers/Metadata.hs140
3 files changed, 162 insertions, 105 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 4dcd76a24..0565874ad 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -55,12 +55,12 @@ import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
import Text.Pandoc.PDF (makePDF)
-import Text.Pandoc.Readers.Markdown (yamlToMeta)
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput,
defaultUserDataDirs, tshow)
import Text.Pandoc.Writers.Shared (lookupMetaString)
+import Text.Pandoc.Readers.Markdown (yamlToMeta)
import qualified Text.Pandoc.UTF8 as UTF8
#ifndef _WINDOWS
import System.Posix.IO (stdOutput)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index cc3173719..e46396fa0 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where
import Prelude
import Control.Monad
import Control.Monad.Except (throwError)
-import qualified Data.ByteString.Lazy as BS
import Data.Char (isAlphaNum, isPunctuation, isSpace)
import Data.List (sortBy, transpose, elemIndex)
import qualified Data.Map as M
@@ -30,8 +29,7 @@ import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
-import qualified Data.YAML as YAML
-import qualified Data.YAML.Event as YE
+import qualified Data.ByteString.Lazy as BL
import System.FilePath (addExtension, takeExtension)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines)
@@ -49,6 +47,7 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
+import Text.Pandoc.Readers.Metadata (yamlBsToMeta)
type MarkdownParser m = ParserT Text ParserState m
@@ -64,6 +63,23 @@ readMarkdown opts s = do
Right result -> return result
Left e -> throwError e
+-- | Read a YAML string and convert it to pandoc metadata.
+-- String scalars in the YAML are parsed as Markdown.
+yamlToMeta :: PandocMonad m
+ => ReaderOptions
+ -> BL.ByteString
+ -> m Meta
+yamlToMeta opts bstr = do
+ let parser = do
+ meta <- yamlBsToMeta parseBlocks bstr
+ return $ runF meta defaultParserState
+ parsed <- readWithM parser def{ stateOptions = opts } ""
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+
+
--
-- Constants and data structure definitions
--
@@ -228,111 +244,12 @@ yamlMetaBlock = try $ do
-- by including --- and ..., we allow yaml blocks with just comments:
let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
- newMetaF <- yamlBsToMeta $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+ newMetaF <- yamlBsToMeta parseBlocks
+ $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-- Since `<>` is left-biased, existing values are not touched:
updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF }
return mempty
--- | Read a YAML string and convert it to pandoc metadata.
--- String scalars in the YAML are parsed as Markdown.
-yamlToMeta :: PandocMonad m => ReaderOptions -> BS.ByteString -> m Meta
-yamlToMeta opts bstr = do
- let parser = do
- meta <- yamlBsToMeta bstr
- return $ runF meta defaultParserState
- parsed <- readWithM parser def{ stateOptions = opts } ""
- case parsed of
- Right result -> return result
- Left e -> throwError e
-
-yamlBsToMeta :: PandocMonad m => BS.ByteString -> MarkdownParser m (F Meta)
-yamlBsToMeta bstr = do
- pos <- getPosition
- case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
- Right ((YAML.Doc (YAML.Mapping _ _ o)):_) -> (fmap Meta) <$> yamlMap o
- Right [] -> return . return $ mempty
- Right [YAML.Doc (YAML.Scalar _ YAML.SNull)] -> return . return $ mempty
- Right _ -> do
- logMessage $
- CouldNotParseYamlMetadata "not an object"
- pos
- return . return $ mempty
- Left (_pos, err') -> do
- logMessage $ CouldNotParseYamlMetadata
- (T.pack err') pos
- return . return $ mempty
-
-nodeToKey :: PandocMonad m => YAML.Node YE.Pos -> m Text
-nodeToKey (YAML.Scalar _ (YAML.SStr t)) = return t
-nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = return t
-nodeToKey _ = throwError $ PandocParseError
- "Non-string key in YAML mapping"
-
-toMetaValue :: PandocMonad m
- => Text -> MarkdownParser m (F MetaValue)
-toMetaValue x =
- -- Note: a standard quoted or unquoted YAML value will
- -- not end in a newline, but a "block" set off with
- -- `|` or `>` will.
- if "\n" `T.isSuffixOf` x
- then parseFromString' (asBlocks <$> parseBlocks) (x <> "\n")
- else parseFromString'
- ((asInlines <$> try pInlines) <|> (asBlocks <$> parseBlocks))
- x
- where pInlines = trimInlinesF . mconcat <$> manyTill inline eof
- asBlocks p = do
- p' <- p
- return $ MetaBlocks (B.toList p')
- asInlines p = do
- p' <- p
- return $ MetaInlines (B.toList p')
-
-checkBoolean :: Text -> Maybe Bool
-checkBoolean t =
- if t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE"
- then Just True
- else if t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE"
- then Just False
- else Nothing
-
-yamlToMetaValue :: PandocMonad m
- => YAML.Node YE.Pos-> MarkdownParser m (F MetaValue)
-yamlToMetaValue (YAML.Scalar _ x) =
- case x of
- YAML.SStr t -> toMetaValue t
- YAML.SBool b -> return $ return $ MetaBool b
- YAML.SFloat d -> return $ return $ MetaString $ tshow d
- YAML.SInt i -> return $ return $ MetaString $ tshow i
- YAML.SUnknown _ t ->
- case checkBoolean t of
- Just b -> return $ return $ MetaBool b
- Nothing -> toMetaValue t
- YAML.SNull -> return $ return $ MetaString ""
-yamlToMetaValue (YAML.Sequence _ _ xs) = do
- xs' <- mapM yamlToMetaValue xs
- return $ do
- xs'' <- sequence xs'
- return $ B.toMetaValue xs''
-yamlToMetaValue (YAML.Mapping _ _ o) = fmap B.toMetaValue <$> yamlMap o
-yamlToMetaValue _ = return $ return $ MetaString ""
-
-yamlMap :: PandocMonad m
- => M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
- -> MarkdownParser m (F (M.Map Text MetaValue))
-yamlMap o = do
- kvs <- forM (M.toList o) $ \(key, v) -> do
- k <- nodeToKey key
- return (k, v)
- let kvs' = filter (not . ignorable . fst) kvs
- (fmap M.fromList . sequence) <$> mapM toMeta kvs'
- where
- ignorable t = "_" `T.isSuffixOf` t
- toMeta (k, v) = do
- fv <- yamlToMetaValue v
- return $ do
- v' <- fv
- return (k, v')
-
stopLine :: PandocMonad m => MarkdownParser m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
new file mode 100644
index 000000000..76f30e957
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -0,0 +1,140 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE RelaxedPolyRec #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
+{- |
+ Module : Text.Pandoc.Readers.Metadata
+ Copyright : Copyright (C) 2006-2019 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Parse YAML/JSON metadata to 'Pandoc' 'Meta'.
+-}
+module Text.Pandoc.Readers.Metadata ( yamlBsToMeta ) where
+
+import Prelude
+import Control.Monad
+import Control.Monad.Except (throwError)
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.YAML as YAML
+import qualified Data.YAML.Event as YE
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Blocks)
+import Text.Pandoc.Class (PandocMonad (..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
+import Text.Pandoc.Parsing hiding (tableWith)
+import Text.Pandoc.Shared
+
+yamlBsToMeta :: PandocMonad m
+ => ParserT Text ParserState m (F Blocks)
+ -> BL.ByteString
+ -> ParserT Text ParserState m (F Meta)
+yamlBsToMeta pBlocks bstr = do
+ pos <- getPosition
+ case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
+ Right ((YAML.Doc (YAML.Mapping _ _ o)):_)
+ -> (fmap Meta) <$> yamlMap pBlocks o
+ Right [] -> return . return $ mempty
+ Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
+ -> return . return $ mempty
+ Right _ -> do logMessage $ CouldNotParseYamlMetadata "not an object"
+ pos
+ return . return $ mempty
+ Left (_pos, err')
+ -> do logMessage $ CouldNotParseYamlMetadata
+ (T.pack err') pos
+ return . return $ mempty
+
+nodeToKey :: PandocMonad m
+ => YAML.Node YE.Pos
+ -> m Text
+nodeToKey (YAML.Scalar _ (YAML.SStr t)) = return t
+nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = return t
+nodeToKey _ = throwError $ PandocParseError
+ "Non-string key in YAML mapping"
+
+toMetaValue :: PandocMonad m
+ => ParserT Text ParserState m (F Blocks)
+ -> Text
+ -> ParserT Text ParserState m (F MetaValue)
+toMetaValue pBlocks x =
+ -- Note: a standard quoted or unquoted YAML value will
+ -- not end in a newline, but a "block" set off with
+ -- `|` or `>` will.
+ if "\n" `T.isSuffixOf` x
+ then parseFromString' (asBlocks <$> pBlocks) (x <> "\n")
+ else parseFromString' pInlines x
+ where pInlines = do
+ bs <- pBlocks
+ return $ do
+ bs' <- bs
+ return $
+ case B.toList bs' of
+ [Plain ils] -> MetaInlines ils
+ [Para ils] -> MetaInlines ils
+ xs -> MetaBlocks xs
+ asBlocks p = do
+ p' <- p
+ return $ MetaBlocks (B.toList p')
+
+checkBoolean :: Text -> Maybe Bool
+checkBoolean t =
+ if t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE"
+ then Just True
+ else if t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE"
+ then Just False
+ else Nothing
+
+yamlToMetaValue :: PandocMonad m
+ => ParserT Text ParserState m (F Blocks)
+ -> YAML.Node YE.Pos
+ -> ParserT Text ParserState m (F MetaValue)
+yamlToMetaValue pBlocks (YAML.Scalar _ x) =
+ case x of
+ YAML.SStr t -> toMetaValue pBlocks t
+ YAML.SBool b -> return $ return $ MetaBool b
+ YAML.SFloat d -> return $ return $ MetaString $ tshow d
+ YAML.SInt i -> return $ return $ MetaString $ tshow i
+ YAML.SUnknown _ t ->
+ case checkBoolean t of
+ Just b -> return $ return $ MetaBool b
+ Nothing -> toMetaValue pBlocks t
+ YAML.SNull -> return $ return $ MetaString ""
+
+yamlToMetaValue pBlocks (YAML.Sequence _ _ xs) = do
+ xs' <- mapM (yamlToMetaValue pBlocks) xs
+ return $ do
+ xs'' <- sequence xs'
+ return $ B.toMetaValue xs''
+yamlToMetaValue pBlocks (YAML.Mapping _ _ o) =
+ fmap B.toMetaValue <$> yamlMap pBlocks o
+yamlToMetaValue _ _ = return $ return $ MetaString ""
+
+yamlMap :: PandocMonad m
+ => ParserT Text ParserState m (F Blocks)
+ -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
+ -> ParserT Text ParserState m (F (M.Map Text MetaValue))
+yamlMap pBlocks o = do
+ kvs <- forM (M.toList o) $ \(key, v) -> do
+ k <- nodeToKey key
+ return (k, v)
+ let kvs' = filter (not . ignorable . fst) kvs
+ (fmap M.fromList . sequence) <$> mapM toMeta kvs'
+ where
+ ignorable t = "_" `T.isSuffixOf` t
+ toMeta (k, v) = do
+ fv <- yamlToMetaValue pBlocks v
+ return $ do
+ v' <- fv
+ return (k, v')
+