aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Metadata.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-10-25 08:48:18 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2021-10-27 12:50:51 -0700
commitd226a35c0ac6485c75f083ce3b25ada1d623f45f (patch)
tree319e9b0810b1d9157de8355b75aba92ffc5c5231 /src/Text/Pandoc/Readers/Metadata.hs
parentb990ca3c4cadf0da0d17a71809cf0a87c67eb175 (diff)
downloadpandoc-d226a35c0ac6485c75f083ce3b25ada1d623f45f.tar.gz
Switch back from HsYAML to yaml.
Reasons: - Performance: HsYAML is around 20 times slower in parsing large YAML bibliographies (#6084). - An issue was submitted to HsYAML, but it hasn't gotten any attention. HsYAML seems borderline unmaintained; it hasn't had a commit in over a year. - Unfortunately this goes back on our attempts to free ourselves from C dependencies (#4535). But I don't see a better alternative until a better pure Haskell parser is available. Closes #6084. Notes: - We've removed the FromYAML instances for all types that had them, since this is a HsYAML-specific typeclass [API change]. (The yaml package just uses From/ToJSON.) - Unlike HsYAML (in the configuration we were using), yaml parses 'Y', 'N', 'Yes', 'No', 'On', 'Off' as boolean values. Users may need to quote these when they are meant to be interpreted as strings. Similarly, 'null' is parsed as a YAML null value (and will be treated as an empty string by pandoc rather than the string 'null'). Quoting it will force it to be interpreted as a string. - Some tests had to be adjusted accordingly. - Pandoc now behaves better when the YAML metadata contains escaping errors: instead of just falling back on treating the section as a table, it raises a YAML parsing error.
Diffstat (limited to 'src/Text/Pandoc/Readers/Metadata.hs')
-rw-r--r--src/Text/Pandoc/Readers/Metadata.hs168
1 files changed, 58 insertions, 110 deletions
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index cbc523b25..534a7645b 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -17,102 +17,61 @@ module Text.Pandoc.Readers.Metadata (
yamlMetaBlock,
yamlMap ) where
-import Control.Monad
+
import Control.Monad.Except (throwError)
-import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString as B
import qualified Data.Map as M
-import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
-import qualified Data.YAML as YAML
-import qualified Data.YAML.Event as YE
+import qualified Data.Yaml as Yaml
+import Data.Aeson (Value(..), Object, Result(..), fromJSON, (.:?), withObject)
+import Data.Aeson.Types (parse)
+import Text.Pandoc.Shared (tshow)
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
-import Text.Pandoc.Definition
+import Text.Pandoc.Definition hiding (Null)
import Text.Pandoc.Error
-import Text.Pandoc.Parsing hiding (tableWith)
-import Text.Pandoc.Shared
-import qualified Data.Text.Lazy as TL
+import Text.Pandoc.Parsing hiding (tableWith, parse)
+
+
import qualified Text.Pandoc.UTF8 as UTF8
yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Sources st m (Future st MetaValue)
- -> BL.ByteString
+ -> B.ByteString
-> ParserT Sources st m (Future st Meta)
yamlBsToMeta pMetaValue bstr = do
- case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
- Right (YAML.Doc (YAML.Mapping _ _ o):_)
- -> fmap Meta <$> yamlMap pMetaValue o
- Right [] -> return . return $ mempty
- Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
- -> return . return $ mempty
- -- the following is what we get from a comment:
- Right [YAML.Doc (YAML.Scalar _ (YAML.SUnknown _ ""))]
- -> return . return $ mempty
+ case Yaml.decodeAllEither' bstr of
+ Right (Object o:_) -> fmap Meta <$> yamlMap pMetaValue o
+ Right [Null] -> return . return $ mempty
Right _ -> Prelude.fail "expected YAML object"
- Left (yamlpos, err')
- -> do pos <- getPosition
- setPosition $ incSourceLine
- (setSourceColumn pos (YE.posColumn yamlpos))
- (YE.posLine yamlpos - 1)
- Prelude.fail err'
-
-fakePos :: YAML.Pos
-fakePos = YAML.Pos (-1) (-1) 1 0
-
-lookupYAML :: Text
- -> YAML.Node YE.Pos
- -> Maybe (YAML.Node YE.Pos)
-lookupYAML t (YAML.Mapping _ _ m) =
- M.lookup (YAML.Scalar fakePos (YAML.SUnknown YE.untagged t)) m
- `mplus`
- M.lookup (YAML.Scalar fakePos (YAML.SStr t)) m
-lookupYAML _ _ = Nothing
+ Left err' -> do
+ throwError $ PandocParseError
+ $ T.pack $ Yaml.prettyPrintParseException err'
-- Returns filtered list of references.
yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Sources st m (Future st MetaValue)
-> (Text -> Bool) -- ^ Filter for id
- -> BL.ByteString
+ -> B.ByteString
-> ParserT Sources st m (Future st [MetaValue])
yamlBsToRefs pMetaValue idpred bstr =
- case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
- Right (YAML.Doc o@YAML.Mapping{}:_)
- -> case lookupYAML "references" o of
- Just (YAML.Sequence _ _ ns) -> do
- let g n = case lookupYAML "id" n of
- Just n' ->
- case nodeToKey n' of
- Nothing -> False
- Just t -> idpred t ||
- case lookupYAML "other-ids" n of
- Just (YAML.Sequence _ _ ns') ->
- let ts' = mapMaybe nodeToKey ns'
- in any idpred ts'
- _ -> False
- Nothing -> False
- sequence <$>
- mapM (yamlToMetaValue pMetaValue) (filter g ns)
- Just _ ->
- Prelude.fail "expecting sequence in 'references' field"
- Nothing ->
- Prelude.fail "expecting 'references' field"
-
- Right [] -> return . return $ mempty
- Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
- -> return . return $ mempty
- Right _ -> Prelude.fail "expecting YAML object"
- Left (yamlpos, err')
- -> do pos <- getPosition
- setPosition $ incSourceLine
- (setSourceColumn pos (YE.posColumn yamlpos))
- (YE.posLine yamlpos - 1)
- Prelude.fail err'
-
-
-nodeToKey :: YAML.Node YE.Pos -> Maybe Text
-nodeToKey (YAML.Scalar _ (YAML.SStr t)) = Just t
-nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
-nodeToKey _ = Nothing
+ case Yaml.decodeEither' bstr of
+ Right (Object m) -> do
+ let isSelected (String t) = idpred t
+ isSelected _ = False
+ let hasSelectedId (Object o) =
+ case parse (withObject "ref" (.:? "id")) (Object o) of
+ Success (Just id') -> isSelected id'
+ _ -> False
+ hasSelectedId _ = False
+ case parse (withObject "metadata" (.:? "references")) (Object m) of
+ Success (Just refs) -> sequence <$>
+ mapM (yamlToMetaValue pMetaValue) (filter hasSelectedId refs)
+ _ -> return $ return []
+ Right _ -> return . return $ []
+ Left err' -> do
+ throwError $ PandocParseError
+ $ T.pack $ Yaml.prettyPrintParseException err'
normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Sources st m (Future st MetaValue)
@@ -133,47 +92,36 @@ normalizeMetaValue pMetaValue x =
isSpaceChar '\t' = True
isSpaceChar _ = False
-checkBoolean :: Text -> Maybe Bool
-checkBoolean t
- | t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE" = Just True
- | t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" = Just False
- | otherwise = Nothing
-
yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Sources st m (Future st MetaValue)
- -> YAML.Node YE.Pos
+ -> Value
-> ParserT Sources st m (Future st MetaValue)
-yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
- case x of
- YAML.SStr t -> normalizeMetaValue pMetaValue t
- YAML.SBool b -> return $ return $ MetaBool b
- YAML.SFloat d -> return $ return $ MetaString $ tshow d
- YAML.SInt i -> return $ return $ MetaString $ tshow i
- YAML.SUnknown _ t ->
- case checkBoolean t of
- Just b -> return $ return $ MetaBool b
- Nothing -> normalizeMetaValue pMetaValue t
- YAML.SNull -> return $ return $ MetaString ""
-
-yamlToMetaValue pMetaValue (YAML.Sequence _ _ xs) =
- fmap MetaList . sequence
- <$> mapM (yamlToMetaValue pMetaValue) xs
-yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
- fmap MetaMap <$> yamlMap pMetaValue o
-yamlToMetaValue _ _ = return $ return $ MetaString ""
+yamlToMetaValue pMetaValue v =
+ case v of
+ String t -> normalizeMetaValue pMetaValue t
+ Bool b -> return $ return $ MetaBool b
+ Number d -> normalizeMetaValue pMetaValue $
+ case fromJSON v of
+ Success (x :: Int) -> tshow x
+ _ -> tshow d
+ Null -> return $ return $ MetaString ""
+ Array{} -> do
+ case fromJSON v of
+ Error err' -> throwError $ PandocParseError $ T.pack err'
+ Success xs -> fmap MetaList . sequence <$>
+ mapM (yamlToMetaValue pMetaValue) xs
+ Object o -> fmap MetaMap <$> yamlMap pMetaValue o
yamlMap :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Sources st m (Future st MetaValue)
- -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
+ -> Object
-> ParserT Sources st m (Future st (M.Map Text MetaValue))
yamlMap pMetaValue o = do
- kvs <- forM (M.toList o) $ \(key, v) -> do
- k <- maybe (throwError $ PandocParseError
- "Non-string key in YAML mapping")
- return $ nodeToKey key
- return (k, v)
- let kvs' = filter (not . ignorable . fst) kvs
- fmap M.fromList . sequence <$> mapM toMeta kvs'
+ case fromJSON (Object o) of
+ Error err' -> throwError $ PandocParseError $ T.pack err'
+ Success (m' :: M.Map Text Value) -> do
+ let kvs = filter (not . ignorable . fst) $ M.toList m'
+ fmap M.fromList . sequence <$> mapM toMeta kvs
where
ignorable t = "_" `T.isSuffixOf` t
toMeta (k, v) = do
@@ -194,7 +142,7 @@ yamlMetaBlock parser = try $ do
-- by including --- and ..., we allow yaml blocks with just comments:
let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
- yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+ yamlBsToMeta parser $ UTF8.fromText rawYaml
stopLine :: Monad m => ParserT Sources st m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()