aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-12-22 19:01:21 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2017-12-23 00:46:48 -0800
commitdd399594531ad3496c5d471af886573a4c099d9f (patch)
tree17dd7ead12ba7d3a6de0db9a7aadc5d2dc18a653
parente3c1449ae6c238f7195981f39bb505da0c69bc0c (diff)
downloadpandoc-dd399594531ad3496c5d471af886573a4c099d9f.tar.gz
JATS reader: better citation handling.
We now convert a ref-list element into a list of citations in metadata, suitable for use with pandoc-citeproc. We also convert references to pandoc citation elements. Thus a JATS article with embedded bibliographic information can be processed with pandoc and pandoc-citeproc to produce a formatted bibliography.
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs82
1 files changed, 79 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 851fbec35..8c0cb2db5 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -5,6 +5,7 @@ import Data.Char (isDigit, isSpace, toUpper)
import Data.Default
import Data.Generics
import Data.List (intersperse)
+import qualified Data.Map as Map
import Data.Maybe (maybeToList, fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
@@ -189,8 +190,7 @@ parseBlock (Elem e) =
"fig" -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e
"table-wrap" -> divWith (attrValue "id" e, ["table-wrap"], []) <$> getBlocks e
"caption" -> divWith (attrValue "id" e, ["caption"], []) <$> sect 6
- "ref-list" -> divWith ("refs", [], []) <$> getBlocks e
- "ref" -> divWith ("ref-" <> attrValue "id" e, [], []) <$> getBlocks e
+ "ref-list" -> parseRefList e
"?xml" -> return mempty
_ -> getBlocks e
where parseMixed container conts = do
@@ -312,6 +312,74 @@ getInlines :: PandocMonad m => Element -> JATS m Inlines
getInlines e' = (trimInlines . mconcat) <$>
mapM parseInline (elContent e')
+parseRefList :: PandocMonad m => Element -> JATS m Blocks
+parseRefList e = do
+ refs <- mapM parseRef $ filterChildren (named "ref") e
+ addMeta "references" refs
+ return mempty
+
+parseRef :: PandocMonad m
+ => Element -> JATS m (Map.Map String MetaValue)
+parseRef e = do
+ let refId = text $ attrValue "id" e
+ let getInlineText n = maybe (return mempty) getInlines . filterChild (named n)
+ case filterChild (named "element-citation") e of
+ Just c -> do
+ let refType = text $
+ case attrValue "publication-type" c of
+ "journal" -> "article-journal"
+ x -> x
+ (refTitle, refContainerTitle) <- do
+ t <- getInlineText "article-title" c
+ ct <- getInlineText "source" c
+ if t == mempty
+ then return (ct, mempty)
+ else return (t, ct)
+ refLabel <- getInlineText "label" c
+ refYear <- getInlineText "year" c
+ refVolume <- getInlineText "volume" c
+ refFirstPage <- getInlineText "fpage" c
+ refLastPage <- getInlineText "lpage" c
+ refPublisher <- getInlineText "publisher-name" c
+ refPublisherPlace <- getInlineText "publisher-loc" c
+ let refPages = refFirstPage <> (if refLastPage == mempty
+ then mempty
+ else text "\x2013" <> refLastPage)
+ let personGroups' = filterElements (named "person-group") c
+ let getName nm = do
+ given <- maybe (return mempty) getInlines
+ $ filterElement (named "given-names") nm
+ family <- maybe (return mempty) getInlines
+ $ filterElement (named "surname") nm
+ return $ toMetaValue $ Map.fromList [
+ ("given", given)
+ , ("family", family)
+ ]
+ personGroups <- mapM (\pg ->
+ do names <- mapM getName
+ (filterElements (named "name") pg)
+ return (attrValue "person-group-type" pg,
+ toMetaValue names))
+ personGroups'
+ return $ Map.fromList $
+ [ ("id", toMetaValue refId)
+ , ("type", toMetaValue refType)
+ , ("title", toMetaValue refTitle)
+ , ("container-title", toMetaValue refContainerTitle)
+ , ("publisher", toMetaValue refPublisher)
+ , ("publisher-place", toMetaValue refPublisherPlace)
+ , ("title", toMetaValue refTitle)
+ , ("issued", toMetaValue
+ $ Map.fromList [
+ ("year", refYear)
+ ])
+ , ("volume", toMetaValue refVolume)
+ , ("page", toMetaValue refPages)
+ , ("citation-label", toMetaValue refLabel)
+ ] ++ personGroups
+ Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
+ -- TODO handle mixed-citation
+
strContentRecursive :: Element -> String
strContentRecursive = strContent .
(\e' -> e'{ elContent = map elementToStr $ elContent e' })
@@ -354,7 +422,15 @@ parseInline (Elem e) =
let rid = attrValue "rid" e
let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e
let attr = (attrValue "id" e, [], maybeToList refType)
- return $ linkWith attr ('#' : rid) "" ils
+ return $ if refType == Just ("ref-type","bibr")
+ then cite [Citation{
+ citationId = rid
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0}] ils
+ else linkWith attr ('#' : rid) "" ils
"ext-link" -> do
ils <- innerInlines
let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e