diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-01-22 10:39:58 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-22 10:39:58 -0800 |
commit | 83d7804b8fc04eb44c8c158f84fe37373b4eb5d9 (patch) | |
tree | a6c4990832107f2ccee181756777ae93762a0c35 /src/Text | |
parent | fa952c8dbea9ad47ea684729f862a3c6bdd0fecc (diff) | |
parent | b4b3560191b3699dd4db9d069244925a3c6074db (diff) | |
download | pandoc-83d7804b8fc04eb44c8c158f84fe37373b4eb5d9.tar.gz |
Merge pull request #7042 from tarleb/jats-element-citations
JATS writer: use element citations
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Citeproc.hs | 39 | ||||
-rw-r--r-- | src/Text/Pandoc/Extensions.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/JATS.hs | 21 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/JATS/References.hs | 160 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/JATS/Types.hs | 4 |
5 files changed, 213 insertions, 25 deletions
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 9649c6971..6658c8c0c 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -18,33 +18,36 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..)) import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText) import Text.Pandoc.Readers.Markdown (yamlToRefs) -import Text.Pandoc.Class (setResourcePath, getResourcePath, getUserDataDir) import qualified Text.Pandoc.BCP47 as BCP47 -import Data.ByteString (ByteString) -import qualified Data.ByteString.Lazy as L +import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta) +import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition as Pandoc -import Text.Pandoc.Walk -import Text.Pandoc.Builder as B -import Text.Pandoc (PandocMonad(..), PandocError(..), - readDataFile, ReaderOptions(..), pandocExtensions, - report, LogMessage(..), fetchItem) +import Text.Pandoc.Class (PandocMonad(..), getResourcePath, getUserDataDir, + fetchItem, readDataFile, report, setResourcePath) +import Text.Pandoc.Error (PandocError(..)) +import Text.Pandoc.Extensions (pandocExtensions) +import Text.Pandoc.Logging (LogMessage(..)) +import Text.Pandoc.Options (ReaderOptions(..)) import Text.Pandoc.Shared (stringify, ordNub, blocksToInlines, tshow) import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Walk (query, walk, walkM) +import Control.Applicative ((<|>)) +import Control.Monad.Except (catchError, throwError) +import Control.Monad.State (State, evalState, get, put, runState) import Data.Aeson (eitherDecode) -import Data.Default -import Data.Ord () +import Data.ByteString (ByteString) +import qualified Data.ByteString.Lazy as L +import Data.Char (isPunctuation, isUpper) +import Data.Default (Default(def)) +import qualified Data.Foldable as Foldable import qualified Data.Map as M +import Data.Maybe (mapMaybe, fromMaybe) +import Data.Ord () +import qualified Data.Sequence as Seq import qualified Data.Set as Set -import Data.Char (isPunctuation, isUpper) import Data.Text (Text) import qualified Data.Text as T -import Control.Monad.State -import qualified Data.Sequence as Seq -import qualified Data.Foldable as Foldable -import System.FilePath -import Control.Applicative -import Control.Monad.Except -import Data.Maybe (mapMaybe, fromMaybe) +import System.FilePath (takeExtension) import Safe (lastMay, initSafe) -- import Debug.Trace as Trace (trace, traceShowId) diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 39c2a0489..7aa32c52c 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -88,6 +88,7 @@ data Extension = -- does not affect readers/writers directly; it causes -- the eastAsianLineBreakFilter to be applied after -- parsing, in Text.Pandoc.App.convertWithOpts. + | Ext_element_citations -- ^ Use element-citation elements for JATS citations | Ext_emoji -- ^ Support emoji like :smile: | Ext_empty_paragraphs -- ^ Allow empty paragraphs | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML @@ -412,6 +413,11 @@ getDefaultExtensions "textile" = extensionsFromList Ext_smart, Ext_raw_html, Ext_auto_identifiers] +getDefaultExtensions "jats" = extensionsFromList + [Ext_auto_identifiers] +getDefaultExtensions "jats_archiving" = getDefaultExtensions "jats" +getDefaultExtensions "jats_publishing" = getDefaultExtensions "jats" +getDefaultExtensions "jats_articleauthoring" = getDefaultExtensions "jats" getDefaultExtensions "opml" = pandocExtensions -- affects notes getDefaultExtensions _ = extensionsFromList [Ext_auto_identifiers] @@ -554,6 +560,14 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_smart , Ext_raw_tex ] + getAll "jats" = + extensionsFromList + [ Ext_auto_identifiers + , Ext_element_citations + ] + getAll "jats_archiving" = getAll "jats" + getAll "jats_publishing" = getAll "jats" + getAll "jats_articleauthoring" = getAll "jats" getAll "opml" = allMarkdownExtensions -- affects notes getAll "twiki" = autoIdExtensions <> extensionsFromList diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index c75d40745..a9369db7a 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -29,6 +29,7 @@ import Data.Maybe (fromMaybe) import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime) import qualified Data.Text as T import Data.Text (Text) +import Text.Pandoc.Citeproc (getReferences) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (languages, languagesByExtension) @@ -40,6 +41,7 @@ import Text.DocLayout import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.DocTemplates (Context(..), Val(..)) +import Text.Pandoc.Writers.JATS.References (referencesToJATS) import Text.Pandoc.Writers.JATS.Table (tableToJATS) import Text.Pandoc.Writers.JATS.Types import Text.Pandoc.Writers.Math @@ -71,15 +73,19 @@ writeJATS = writeJatsArchiving -- | Convert a @'Pandoc'@ document to JATS. writeJats :: PandocMonad m => JATSTagSet -> WriterOptions -> Pandoc -> m Text -writeJats tagSet opts d = - runReaderT (evalStateT (docToJATS opts d) initialState) - environment - where initialState = JATSState { jatsNotes = [] } - environment = JATSEnv +writeJats tagSet opts d = do + refs <- if extensionEnabled Ext_element_citations $ writerExtensions opts + then getReferences Nothing d + else pure [] + let environment = JATSEnv { jatsTagSet = tagSet , jatsInlinesWriter = inlinesToJATS , jatsBlockWriter = blockToJATS + , jatsReferences = refs } + let initialState = JATSState { jatsNotes = [] } + runReaderT (evalStateT (docToJATS opts d) initialState) + environment -- | Convert Pandoc document to string in JATS format. docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text @@ -258,7 +264,10 @@ blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident = inTagsSimple "mixed-citation" <$> inlinesToJATS opts lst blockToJATS opts (Div ("refs",_,_) xs) = do - contents <- blocksToJATS opts xs + refs <- asks jatsReferences + contents <- if null refs + then blocksToJATS opts xs + else referencesToJATS opts refs return $ inTagsIndented "ref-list" contents blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do contents <- blocksToJATS opts bs diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs new file mode 100644 index 000000000..4ee7eb9dd --- /dev/null +++ b/src/Text/Pandoc/Writers/JATS/References.hs @@ -0,0 +1,160 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.JATS.References + Copyright : © 2021 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb@zeitkraut.de> + Stability : alpha + Portability : portable + +Creation of a bibliography list using @<element-citation>@ elements in +reference items. +-} +module Text.Pandoc.Writers.JATS.References + ( referencesToJATS + , referenceToJATS + ) where + +import Citeproc.Pandoc () +import Citeproc.Types + ( Date (..), DateParts (..), ItemId (..), Name (..), Reference (..) + , Val (..) , lookupVariable, valToText + ) +import Data.Text (Text) +import Text.DocLayout (Doc, empty, isEmpty, literal, vcat) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Builder (Inlines) +import Text.Pandoc.Options (WriterOptions) +import Text.Pandoc.Shared (tshow) +import Text.Pandoc.Writers.JATS.Types +import Text.Pandoc.XML (inTags) +import qualified Data.Text as T + +referencesToJATS :: PandocMonad m + => WriterOptions + -> [Reference Inlines] + -> JATS m (Doc Text) +referencesToJATS opts = + fmap (inTags True "ref-list" [] . vcat) . mapM (referenceToJATS opts) + +referenceToJATS :: PandocMonad m + => WriterOptions + -> Reference Inlines + -> JATS m (Doc Text) +referenceToJATS _opts ref = do + let refType = referenceType ref + let pubType = [("publication-type", refType) | not (T.null refType)] + let wrap = inTags True "ref" [("id", "ref-" <> unItemId (referenceId ref))] + . inTags True "element-citation" pubType + return . wrap . vcat $ + [ authors + , "title" `varInTag` + if refType == "book" + then "source" + else "article-title" + , if refType == "book" + then empty + else "container-title" `varInTag` "source" + , editors + , "publisher" `varInTag` "publisher-name" + , "publisher-place" `varInTag` "publisher-loc" + , yearTag + , accessed + , "volume" `varInTag` "volume" + , "issue" `varInTag` "issue" + , "page-first" `varInTag` "fpage" + , "page-last" `varInTag` "lpage" + , "pages" `varInTag` "page-range" + , "ISBN" `varInTag` "isbn" + , "ISSN" `varInTag` "issn" + , varInTagWith "doi" "pub-id" [("pub-id-type", "doi")] + , varInTagWith "pmid" "pub-id" [("pub-id-type", "pmid")] + ] + where + varInTag var tagName = varInTagWith var tagName [] + + varInTagWith var tagName tagAttribs = + case lookupVariable var ref >>= valToText of + Nothing -> mempty + Just val -> inTags' tagName tagAttribs $ literal val + + authors = case lookupVariable "author" ref of + Just (NamesVal names) -> + inTags True "person-group" [("person-group-type", "author")] . vcat $ + map toNameElements names + _ -> empty + + editors = case lookupVariable "editor" ref of + Just (NamesVal names) -> + inTags True "person-group" [("person-group-type", "editor")] . vcat $ + map toNameElements names + _ -> empty + + yearTag = + case lookupVariable "issued" ref of + Just (DateVal date) -> toDateElements date + _ -> empty + + accessed = + case lookupVariable "accessed" ref of + Just (DateVal d) -> inTags' "date-in-citation" + [("content-type", "access-date")] + (toDateElements d) + _ -> empty + +toDateElements :: Date -> Doc Text +toDateElements date = + case dateParts date of + dp@(DateParts (y:m:d:_)):_ -> yearElement y dp <> + monthElement m <> + dayElement d + dp@(DateParts (y:m:_)):_ -> yearElement y dp <> monthElement m + dp@(DateParts (y:_)):_ -> yearElement y dp + _ -> empty + +yearElement :: Int -> DateParts -> Doc Text +yearElement year dp = + inTags' "year" [("iso-8601-date", iso8601 dp)] $ literal (fourDigits year) + +monthElement :: Int -> Doc Text +monthElement month = inTags' "month" [] . literal $ twoDigits month + +dayElement :: Int -> Doc Text +dayElement day = inTags' "day" [] . literal $ twoDigits day + +iso8601 :: DateParts -> Text +iso8601 = T.intercalate "-" . \case + DateParts (y:m:d:_) -> [fourDigits y, twoDigits m, twoDigits d] + DateParts (y:m:_) -> [fourDigits y, twoDigits m] + DateParts (y:_) -> [fourDigits y] + _ -> [] + +twoDigits :: Int -> Text +twoDigits n = T.takeEnd 2 $ '0' `T.cons` tshow n + +fourDigits :: Int -> Text +fourDigits n = T.takeEnd 4 $ "000" <> tshow n + +toNameElements :: Name -> Doc Text +toNameElements name = + if not (isEmpty nameTags) + then inTags' "name" [] nameTags + else nameLiteral name `inNameTag` "string-name" + where + inNameTag val tag = maybe empty (inTags' tag [] . literal) val + surnamePrefix = maybe mempty (`T.snoc` ' ') $ + nameNonDroppingParticle name + givenSuffix = maybe mempty (T.cons ' ') $ + nameDroppingParticle name + nameTags = mconcat + [ ((surnamePrefix <>) <$> nameFamily name) `inNameTag` "surname" + , ((<> givenSuffix) <$> nameGiven name) `inNameTag` "given-names" + , nameSuffix name `inNameTag` "suffix" + ] + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes. +inTags' :: Text -> [(Text, Text)] -> Doc Text -> Doc Text +inTags' = inTags False diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs index 54ed4a8bd..6fdddc0b5 100644 --- a/src/Text/Pandoc/Writers/JATS/Types.hs +++ b/src/Text/Pandoc/Writers/JATS/Types.hs @@ -17,11 +17,12 @@ module Text.Pandoc.Writers.JATS.Types ) where +import Citeproc.Types (Reference) import Control.Monad.Reader (ReaderT) import Control.Monad.State (StateT) import Data.Text (Text) import Text.DocLayout (Doc) -import Text.Pandoc.Definition (Block, Inline) +import Text.Pandoc.Builder (Block, Inline, Inlines) import Text.Pandoc.Options (WriterOptions) -- | JATS tag set variant @@ -40,6 +41,7 @@ data JATSEnv m = JATSEnv { jatsTagSet :: JATSTagSet , jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text) , jatsBlockWriter :: WriterOptions -> Block -> JATS m (Doc Text) + , jatsReferences :: [Reference Inlines] } -- | JATS writer type |