aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2020-10-05 21:07:47 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2020-10-05 21:07:47 -0700
commit6a32ea71ea9af8613b8585e874fef8d5f29665fa (patch)
tree12b2ed269b25149bb5620b0c9d09c658f923dd32 /src/Text
parent89e4f1bf9ad48e3fdefeb5c921e9af62ebc24601 (diff)
downloadpandoc-6a32ea71ea9af8613b8585e874fef8d5f29665fa.tar.gz
Add yamlToRefs, yamlBsToRefs.
T.P.Readers.Markdown now exports yamlToRefs. [API change] T.P.Readers.Metadata exports yamlBsToRefs. [API change] These allow specifying an id filter so we parse only references that are used in the document. Improves timing with a 3M yaml references file from 36s to 17s.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Citeproc.hs12
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs27
-rw-r--r--src/Text/Pandoc/Readers/Metadata.hs45
3 files changed, 74 insertions, 10 deletions
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 8274e35d7..4f92cf8ca 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -18,7 +18,7 @@ import Text.Pandoc.Citeproc.Locator (parseLocator)
import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
-import Text.Pandoc.Readers.Markdown (yamlToMeta)
+import Text.Pandoc.Readers.Markdown (yamlToRefs)
import Text.Pandoc.Class (setResourcePath, getResourcePath, getUserDataDir)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as L
@@ -213,12 +213,10 @@ getRefs locale format idpred raw =
(return . filter (idpred . unItemId . referenceId)) .
cslJsonToReferences $ raw
Format_yaml -> do
- meta <- yamlToMeta def{ readerExtensions = pandocExtensions }
- (L.fromStrict raw)
- case lookupMeta "references" meta of
- Just (MetaList rs) ->
- return $ mapMaybe (metaValueToReference idpred) rs
- _ -> throwError $ PandocAppError "No references field"
+ rs <- yamlToRefs idpred
+ def{ readerExtensions = pandocExtensions }
+ (L.fromStrict raw)
+ return $ mapMaybe (metaValueToReference idpred . MetaMap) rs
-- localized quotes
convertQuotes :: Locale -> Inline -> Inline
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 257788081..65925ee95 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -13,7 +13,10 @@
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where
+module Text.Pandoc.Readers.Markdown (
+ readMarkdown,
+ yamlToMeta,
+ yamlToRefs ) where
import Control.Monad
import Control.Monad.Except (throwError)
@@ -44,7 +47,7 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Readers.Metadata (yamlBsToMeta)
+import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs)
type MarkdownParser m = ParserT Text ParserState m
@@ -75,6 +78,26 @@ yamlToMeta opts bstr = do
Right result -> return result
Left e -> throwError e
+-- | Read a YAML string and extract references from the
+-- 'references' field, filter using an id predicate and
+-- parsing fields as Markdown.
+yamlToRefs :: PandocMonad m
+ => (Text -> Bool)
+ -> ReaderOptions
+ -> BL.ByteString
+ -> m [M.Map Text MetaValue]
+yamlToRefs idpred opts bstr = do
+ let parser = do
+ refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
+ return $ runF refs defaultParserState
+ parsed <- readWithM parser def{ stateOptions = opts } ""
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+
+
+
--
-- Constants and data structure definitions
--
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index 826111756..b802c752b 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.Readers.Metadata
Copyright : Copyright (C) 2006-2020 John MacFarlane
@@ -10,7 +11,10 @@
Parse YAML/JSON metadata to 'Pandoc' 'Meta'.
-}
-module Text.Pandoc.Readers.Metadata ( yamlBsToMeta, yamlMap ) where
+module Text.Pandoc.Readers.Metadata (
+ yamlBsToMeta,
+ yamlBsToRefs,
+ yamlMap ) where
import Control.Monad
import Control.Monad.Except (throwError)
@@ -47,6 +51,45 @@ yamlBsToMeta pMetaValue bstr = do
(T.pack err') pos
return . return $ mempty
+-- Returns filtered list of references.
+yamlBsToRefs :: PandocMonad m
+ => ParserT Text ParserState m (F MetaValue)
+ -> (Text -> Bool) -- ^ Filter for id
+ -> BL.ByteString
+ -> ParserT Text ParserState m (F [M.Map Text MetaValue])
+yamlBsToRefs pMetaValue idpred bstr = do
+ pos <- getPosition
+ case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
+ Right (YAML.Doc (YAML.Mapping _ _ o):_)
+ -> case YAML.parseEither (o YAML..: "references") of
+ Right ns -> do
+ let g n = case YAML.parseEither (n YAML..: "id") of
+ Right t -> idpred t ||
+ case YAML.parseEither (n YAML..:
+ "other-ids") of
+ Right (oids :: [Text]) ->
+ any idpred oids
+ _ -> False
+ _ -> False
+ sequence <$> mapM (yamlMap pMetaValue) (filter g ns)
+ Left _ -> do
+ logMessage $ CouldNotParseYamlMetadata
+ ("expecting 'references' field") pos
+ return . return $ mempty
+
+ Right [] -> return . return $ mempty
+ Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
+ -> return . return $ mempty
+ Right _ -> do logMessage $ CouldNotParseYamlMetadata "not an object"
+ pos
+ return . return $ mempty
+ Left (_pos, err')
+ -> do logMessage $ CouldNotParseYamlMetadata
+ (T.pack err') pos
+ return . return $ mempty
+
+
+
nodeToKey :: PandocMonad m
=> YAML.Node YE.Pos
-> m Text