aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/App.hs27
-rw-r--r--src/Text/Pandoc/App/CommandLineOptions.hs6
-rw-r--r--src/Text/Pandoc/App/FormatHeuristics.hs1
-rw-r--r--src/Text/Pandoc/App/Opt.hs5
-rw-r--r--src/Text/Pandoc/Citeproc.hs492
-rw-r--r--src/Text/Pandoc/Citeproc/BibTeX.hs1237
-rw-r--r--src/Text/Pandoc/Citeproc/CslJson.hs37
-rw-r--r--src/Text/Pandoc/Citeproc/Data.hs31
-rw-r--r--src/Text/Pandoc/Citeproc/Locator.hs279
-rw-r--r--src/Text/Pandoc/Citeproc/MetaValue.hs252
-rw-r--r--src/Text/Pandoc/Citeproc/Util.hs70
-rw-r--r--src/Text/Pandoc/Class/PandocMonad.hs4
-rw-r--r--src/Text/Pandoc/Error.hs4
-rw-r--r--src/Text/Pandoc/Filter.hs15
-rw-r--r--src/Text/Pandoc/Logging.hs5
-rw-r--r--src/Text/Pandoc/Readers.hs8
-rw-r--r--src/Text/Pandoc/Readers/BibTeX.hs70
-rw-r--r--src/Text/Pandoc/Readers/CslJson.hs53
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs1
-rw-r--r--src/Text/Pandoc/Templates.hs1
-rw-r--r--src/Text/Pandoc/Writers.hs3
-rw-r--r--src/Text/Pandoc/Writers/CslJson.hs87
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs12
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs79
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs43
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs64
26 files changed, 2833 insertions, 53 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index aa75436a4..58f605a19 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -76,15 +76,6 @@ convertWithOpts opts = do
mapM_ (UTF8.hPutStrLn stdout) (fromMaybe ["-"] $ optInputFiles opts)
exitSuccess
- let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc"
- isPandocCiteproc _ = False
- -- --bibliography implies -F pandoc-citeproc for backwards compatibility:
- let needsCiteproc = isJust (lookupMeta "bibliography"
- (optMetadata opts)) &&
- optCiteMethod opts `notElem` [Natbib, Biblatex] &&
- not (any isPandocCiteproc filters)
- let filters' = filters ++ [ JSONFilter "pandoc-citeproc" | needsCiteproc ]
-
let sources = case optInputFiles opts of
Just xs | not (optIgnoreArgs opts) -> xs
_ -> ["-"]
@@ -170,7 +161,14 @@ convertWithOpts opts = do
let writerName = outputWriterName outputSettings
let writerOptions = outputWriterOptions outputSettings
- let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
+ let bibOutput = writerName == "bibtex" ||
+ writerName == "biblatex" ||
+ writerName == "csljson"
+
+ let standalone = optStandalone opts ||
+ not (isTextFormat format) ||
+ pdfOutput ||
+ bibOutput
-- We don't want to send output to the terminal if the user
-- does 'pandoc -t docx input.txt'; though we allow them to
@@ -272,6 +270,13 @@ convertWithOpts opts = do
setNoCheckCertificate (optNoCheckCertificate opts)
+ let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc"
+ isPandocCiteproc _ = False
+
+ when (any isPandocCiteproc filters) $
+ report $ Deprecated "pandoc-citeproc filter"
+ "Use --citeproc instead."
+
doc <- sourceToDoc sources >>=
( (if isJust (optExtractMedia opts)
then fillMediaBag
@@ -279,7 +284,7 @@ convertWithOpts opts = do
>=> return . adjustMetadata (metadataFromFile <>)
>=> return . adjustMetadata (<> metadata)
>=> applyTransforms transforms
- >=> applyFilters readerOpts filters' [T.unpack format]
+ >=> applyFilters readerOpts filters [T.unpack format]
>=> maybe return extractMedia (optExtractMedia opts)
)
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index 99dba4613..36b024ba7 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -656,6 +656,12 @@ options =
"all|none|best")
"" -- "Starting number for sections, subsections, etc."
+ , Option "C" ["citeproc"]
+ (NoArg
+ (\opt -> return opt { optFilters =
+ optFilters opt ++ [CiteprocFilter] }))
+ "" -- "Process citations"
+
, Option "" ["bibliography"]
(ReqArg
(\arg opt -> return opt{ optMetadata =
diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs
index 97eebe3b6..155b7e586 100644
--- a/src/Text/Pandoc/App/FormatHeuristics.hs
+++ b/src/Text/Pandoc/App/FormatHeuristics.hs
@@ -74,5 +74,6 @@ formatFromFilePath x =
".xhtml" -> Just "html"
".ipynb" -> Just "ipynb"
".csv" -> Just "csv"
+ ".bib" -> Just "biblatex"
['.',y] | y `elem` ['1'..'9'] -> Just "man"
_ -> Nothing
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index 5c39f4ab6..3da6a936b 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -294,6 +294,11 @@ doOpt (k',v) = do
parseYAML v >>= \x -> return (\o -> o{ optColumns = x })
"filters" ->
parseYAML v >>= \x -> return (\o -> o{ optFilters = optFilters o <> x })
+ "citeproc" ->
+ parseYAML v >>= \x ->
+ if x
+ then return (\o -> o{ optFilters = CiteprocFilter : optFilters o })
+ else return id
"email-obfuscation" ->
parseYAML v >>= \x -> return (\o -> o{ optEmailObfuscation = x })
"identifier-prefix" ->
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
new file mode 100644
index 000000000..9fb0e2f0b
--- /dev/null
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -0,0 +1,492 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE StrictData #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE DeriveTraversable #-}
+module Text.Pandoc.Citeproc
+ ( processCitations )
+where
+
+import Citeproc as Citeproc
+import Citeproc.Pandoc ()
+import Text.Pandoc.Citeproc.Locator (parseLocator)
+import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
+import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
+import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Lazy as L
+import Text.Pandoc.Definition as Pandoc
+import Text.Pandoc.Walk
+import Text.Pandoc.Builder as B
+import Text.Pandoc (PandocMonad(..), PandocError(..), readMarkdown,
+ readDataFile, ReaderOptions(..), pandocExtensions,
+ report, LogMessage(..), fetchItem)
+import Text.Pandoc.Shared (stringify, ordNub, blocksToInlines)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Aeson (eitherDecode)
+import Data.Default
+import Data.Ord ()
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import Data.Char (isPunctuation)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Control.Monad.State
+import qualified Data.Sequence as Seq
+import qualified Data.Foldable as Foldable
+import System.FilePath
+import Control.Applicative
+import Control.Monad.Except
+import Data.Maybe (mapMaybe, fromMaybe)
+import Safe (lastMay, initSafe)
+-- import Debug.Trace as Trace (trace, traceShowId)
+
+
+processCitations :: PandocMonad m => Pandoc -> m Pandoc
+processCitations (Pandoc meta bs) = do
+ let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta)
+ >>= metaValueToText
+
+ let getFile fp = catchError (fst <$> fetchItem fp)
+ (\e -> catchError (readDataFile
+ (T.unpack $ "csl/" <> fp))
+ (\_ -> throwError e))
+
+ let getCslDefault = readDataFile "default.csl"
+
+ cslContents <- UTF8.toText <$> maybe getCslDefault getFile cslfile
+
+ let abbrevFile = lookupMeta "citation-abbreviations" meta >>= metaValueToText
+
+ mbAbbrevs <- case abbrevFile of
+ Nothing -> return Nothing
+ Just fp -> do
+ rawAbbr <- getFile fp
+ case eitherDecode (L.fromStrict rawAbbr) of
+ Left err -> throwError $ PandocCiteprocError $
+ CiteprocParseError $
+ "Could not parse abbreviations file " <> fp
+ <> "\n" <> T.pack err
+ Right abbr -> return $ Just abbr
+
+ let getParentStyle url = UTF8.toText . fst <$> fetchItem url
+
+ -- TODO check .csl directory if not found
+ styleRes <- Citeproc.parseStyle getParentStyle cslContents
+ style <-
+ case styleRes of
+ Left err -> throwError $ PandocAppError $ prettyCiteprocError err
+ Right style -> return style{ styleAbbreviations = mbAbbrevs }
+ let mblang = parseLang <$>
+ ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= metaValueToText)
+ let locale = Citeproc.mergeLocales mblang style
+ let getCiteId (Cite cs _) = Set.fromList $ map B.citationId cs
+ getCiteId _ = mempty
+ let metanocites = lookupMeta "nocite" meta
+ let meta' = deleteMeta "nocite" meta
+ let nocites = maybe mempty (query getCiteId) metanocites
+ let citeIds = query getCiteId (Pandoc meta bs)
+ let idpred = if "*" `Set.member` nocites
+ then const True
+ else (\c -> c `Set.member` citeIds ||
+ c `Set.member` nocites)
+ refs <- map (linkifyVariables . legacyDateRanges) <$>
+ case lookupMeta "references" meta of
+ Just (MetaList rs) -> return $ mapMaybe metaValueToReference rs
+ _ ->
+ case lookupMeta "bibliography" meta of
+ Just (MetaList xs) ->
+ mconcat <$>
+ mapM (getRefsFromBib locale idpred)
+ (mapMaybe metaValueToText xs)
+ Just x ->
+ case metaValueToText x of
+ Just fp -> getRefsFromBib locale idpred fp
+ Nothing -> return []
+ Nothing -> return []
+ let otherIdsMap = foldr (\ref m ->
+ case T.words . extractText <$>
+ M.lookup "other-ids" (referenceVariables ref) of
+ Nothing -> m
+ Just ids -> foldr
+ (\id' ->
+ M.insert id' (referenceId ref)) m ids)
+ M.empty refs
+ -- TODO: issue warning if no refs defined
+ let citations = getCitations locale otherIdsMap $ Pandoc meta' bs
+ let linkCites = maybe False truish $ lookupMeta "link-citations" meta
+ let opts = defaultCiteprocOptions{ linkCitations = linkCites }
+ let result = Citeproc.citeproc opts style (localeLanguage locale)
+ refs citations
+ mapM_ (report . CiteprocWarning) (resultWarnings result)
+ let sopts = styleOptions style
+ let classes = "references" : -- TODO remove this or keep for compatibility?
+ "csl-bib-body" :
+ ["hanging-indent" | styleHangingIndent sopts]
+ let refkvs = (case styleEntrySpacing sopts of
+ Just es | es > 0 -> (("entry-spacing",T.pack $ show es):)
+ _ -> id) .
+ (case styleLineSpacing sopts of
+ Just ls | ls > 1 -> (("line-spacing",T.pack $ show ls):)
+ _ -> id) $ []
+ let bibs = mconcat $ map (\(ident, out) ->
+ B.divWith ("ref-" <> ident,["csl-entry"],[]) . B.para $
+ walk (convertQuotes locale) out)
+ (resultBibliography result)
+ let moveNotes = maybe True truish $
+ lookupMeta "notes-after-punctuation" meta
+ let cits = map (walk (convertQuotes locale)) $
+ resultCitations result
+
+ let fixQuotes = case localePunctuationInQuote locale of
+ Just True ->
+ B.toList . movePunctuationInsideQuotes . B.fromList
+ _ -> id
+
+ let Pandoc meta'' bs' =
+ maybe id (setMeta "nocite") metanocites $
+ walk (fixQuotes . mvPunct moveNotes locale) $ walk deNote $
+ evalState (walkM insertResolvedCitations $ Pandoc meta' bs)
+ $ cits
+ return $ Pandoc meta'' $ insertRefs refkvs classes meta'' (B.toList bibs) bs'
+
+getRefsFromBib :: PandocMonad m
+ => Locale -> (Text -> Bool) -> Text -> m [Reference Inlines]
+getRefsFromBib locale idpred t = do
+ let fp = T.unpack t
+ raw <- readFileStrict fp
+ case formatFromExtension fp of
+ Just f -> getRefs locale f idpred raw
+ Nothing -> throwError $ PandocAppError $
+ "Could not deterine bibliography format for " <> t
+
+getRefs :: PandocMonad m
+ => Locale
+ -> BibFormat
+ -> (Text -> Bool)
+ -> ByteString
+ -> m [Reference Inlines]
+getRefs locale format idpred raw =
+ case format of
+ Format_bibtex ->
+ either (throwError . PandocAppError . T.pack . show) return .
+ readBibtexString Bibtex locale idpred . UTF8.toText $ raw
+ Format_biblatex ->
+ either (throwError . PandocAppError . T.pack . show) return .
+ readBibtexString Biblatex locale idpred . UTF8.toText $ raw
+ Format_json ->
+ either (throwError . PandocAppError . T.pack)
+ (return . filter (idpred . unItemId . referenceId)) .
+ cslJsonToReferences $ raw
+ Format_yaml -> do
+ Pandoc meta _ <-
+ readMarkdown
+ def{ readerExtensions = pandocExtensions }
+ (UTF8.toText raw)
+ case lookupMeta "references" meta of
+ Just (MetaList rs) ->
+ return $ filter (idpred . unItemId . referenceId)
+ $ mapMaybe metaValueToReference rs
+ _ -> throwError $ PandocAppError "No references field"
+
+-- localized quotes
+convertQuotes :: Locale -> Inline -> Inline
+convertQuotes locale (Quoted qt ils) =
+ case (M.lookup openterm terms, M.lookup closeterm terms) of
+ (Just ((_,oq):_), Just ((_,cq):_)) ->
+ Span ("",[],[]) (Str oq : ils ++ [Str cq])
+ _ -> Quoted qt ils
+ where
+ terms = localeTerms locale
+ openterm = case qt of
+ DoubleQuote -> "open-quote"
+ SingleQuote -> "open-inner-quote"
+ closeterm = case qt of
+ DoubleQuote -> "close-quote"
+ SingleQuote -> "close-inner-quote"
+convertQuotes _ x = x
+
+-- assumes we walk in same order as query
+insertResolvedCitations :: Inline -> State [Inlines] Inline
+insertResolvedCitations (Cite cs ils) = do
+ resolved <- get
+ case resolved of
+ [] -> return (Cite cs ils)
+ (x:xs) -> do
+ put xs
+ return $ Cite cs (B.toList x)
+insertResolvedCitations x = return x
+
+getCitations :: Locale
+ -> M.Map Text ItemId
+ -> Pandoc
+ -> [Citeproc.Citation Inlines]
+getCitations locale otherIdsMap = Foldable.toList . query getCitation
+ where
+ getCitation (Cite cs _fallback) = Seq.singleton $
+ Citeproc.Citation { Citeproc.citationId = Nothing
+ , Citeproc.citationNoteNumber =
+ case cs of
+ [] -> Nothing
+ (Pandoc.Citation{ Pandoc.citationNoteNum = n }:
+ _) | n > 0 -> Just n
+ | otherwise -> Nothing
+ , Citeproc.citationItems =
+ fromPandocCitations locale otherIdsMap cs
+ }
+ getCitation _ = mempty
+
+fromPandocCitations :: Locale
+ -> M.Map Text ItemId
+ -> [Pandoc.Citation]
+ -> [CitationItem Inlines]
+fromPandocCitations locale otherIdsMap = concatMap go
+ where
+ go c =
+ let (loclab, suffix) = parseLocator locale (citationSuffix c)
+ (mblab, mbloc) = case loclab of
+ Just (loc, lab) -> (Just loc, Just lab)
+ Nothing -> (Nothing, Nothing)
+ cit = CitationItem
+ { citationItemId = fromMaybe
+ (ItemId $ Pandoc.citationId c)
+ (M.lookup (Pandoc.citationId c) otherIdsMap)
+ , citationItemLabel = mblab
+ , citationItemLocator = mbloc
+ , citationItemType = NormalCite
+ , citationItemPrefix = case citationPrefix c of
+ [] -> Nothing
+ ils -> Just $ B.fromList ils <>
+ B.space
+ , citationItemSuffix = case suffix of
+ [] -> Nothing
+ ils -> Just $ B.fromList ils
+ }
+ in if Pandoc.citationId c == "*"
+ then []
+ else
+ case citationMode c of
+ AuthorInText -> [ cit{ citationItemType = AuthorOnly
+ , citationItemSuffix = Nothing }
+ , cit{ citationItemType =
+ Citeproc.SuppressAuthor
+ , citationItemPrefix = Nothing } ]
+ NormalCitation -> [ cit ]
+ Pandoc.SuppressAuthor
+ -> [ cit{ citationItemType =
+ Citeproc.SuppressAuthor } ]
+
+
+
+data BibFormat =
+ Format_biblatex
+ | Format_bibtex
+ | Format_json
+ | Format_yaml
+ deriving (Show, Eq, Ord)
+
+formatFromExtension :: FilePath -> Maybe BibFormat
+formatFromExtension fp = case dropWhile (== '.') $ takeExtension fp of
+ "biblatex" -> Just Format_biblatex
+ "bibtex" -> Just Format_bibtex
+ "bib" -> Just Format_biblatex
+ "json" -> Just Format_json
+ "yaml" -> Just Format_yaml
+ _ -> Nothing
+
+
+isNote :: Inline -> Bool
+isNote (Note _) = True
+isNote (Cite _ [Note _]) = True
+ -- the following allows citation styles that are "in-text" but use superscript
+ -- references to be treated as if they are "notes" for the purposes of moving
+ -- the citations after trailing punctuation (see <https://github.com/jgm/pandoc-citeproc/issues/382>):
+isNote (Cite _ [Superscript _]) = True
+isNote _ = False
+
+isSpacy :: Inline -> Bool
+isSpacy Space = True
+isSpacy SoftBreak = True
+isSpacy _ = False
+
+
+mvPunct :: Bool -> Locale -> [Inline] -> [Inline]
+mvPunct moveNotes locale (x : xs)
+ | isSpacy x = x : mvPunct moveNotes locale xs
+-- 'x [^1],' -> 'x,[^1]'
+mvPunct moveNotes locale (q : s : x : ys)
+ | isSpacy s
+ , isNote x
+ = let spunct = T.takeWhile isPunctuation $ stringify ys
+ in if moveNotes
+ then if T.null spunct
+ then q : x : mvPunct moveNotes locale ys
+ else q : Str spunct : x : mvPunct moveNotes locale
+ (B.toList
+ (dropTextWhile isPunctuation (B.fromList ys)))
+ else q : x : mvPunct moveNotes locale ys
+-- 'x[^1],' -> 'x,[^1]'
+mvPunct moveNotes locale (Cite cs ils : ys)
+ | not (null ils)
+ , isNote (last ils)
+ , startWithPunct ys
+ , moveNotes
+ = let s = stringify ys
+ spunct = T.takeWhile isPunctuation s
+ in Cite cs (init ils
+ ++ [Str spunct | not (endWithPunct False (init ils))]
+ ++ [last ils]) :
+ mvPunct moveNotes locale
+ (B.toList (dropTextWhile isPunctuation (B.fromList ys)))
+mvPunct moveNotes locale (s : x : ys) | isSpacy s, isNote x =
+ x : mvPunct moveNotes locale ys
+mvPunct moveNotes locale (s : x@(Cite _ (Superscript _ : _)) : ys)
+ | isSpacy s = x : mvPunct moveNotes locale ys
+mvPunct moveNotes locale (Cite cs ils : Str "." : ys)
+ | "." `T.isSuffixOf` (stringify ils)
+ = Cite cs ils : mvPunct moveNotes locale ys
+mvPunct moveNotes locale (x:xs) = x : mvPunct moveNotes locale xs
+mvPunct _ _ [] = []
+
+endWithPunct :: Bool -> [Inline] -> Bool
+endWithPunct _ [] = False
+endWithPunct onlyFinal xs@(_:_) =
+ case reverse (T.unpack $ stringify xs) of
+ [] -> True
+ -- covers .), .", etc.:
+ (d:c:_) | isPunctuation d
+ && not onlyFinal
+ && isEndPunct c -> True
+ (c:_) | isEndPunct c -> True
+ | otherwise -> False
+ where isEndPunct c = c `elem` (".,;:!?" :: String)
+
+
+
+startWithPunct :: [Inline] -> Bool
+startWithPunct ils =
+ case T.uncons (stringify ils) of
+ Just (c,_) -> c `elem` (".,;:!?" :: [Char])
+ Nothing -> False
+
+truish :: MetaValue -> Bool
+truish (MetaBool t) = t
+truish (MetaString s) = isYesValue (T.toLower s)
+truish (MetaInlines ils) = isYesValue (T.toLower (stringify ils))
+truish (MetaBlocks [Plain ils]) = isYesValue (T.toLower (stringify ils))
+truish _ = False
+
+isYesValue :: Text -> Bool
+isYesValue "t" = True
+isYesValue "true" = True
+isYesValue "yes" = True
+isYesValue _ = False
+
+-- if document contains a Div with id="refs", insert
+-- references as its contents. Otherwise, insert references
+-- at the end of the document in a Div with id="refs"
+insertRefs :: [(Text,Text)] -> [Text] -> Meta -> [Block] -> [Block] -> [Block]
+insertRefs _ _ _ [] bs = bs
+insertRefs refkvs refclasses meta refs bs =
+ if isRefRemove meta
+ then bs
+ else case runState (walkM go bs) False of
+ (bs', True) -> bs'
+ (_, False)
+ -> case refTitle meta of
+ Nothing ->
+ case reverse bs of
+ Header lev (id',classes,kvs) ys : xs ->
+ reverse xs ++
+ [Header lev (id',addUnNumbered classes,kvs) ys,
+ Div ("refs",refclasses,refkvs) refs]
+ _ -> bs ++ [refDiv]
+ Just ils -> bs ++
+ [Header 1 ("bibliography", ["unnumbered"], []) ils,
+ refDiv]
+ where
+ refDiv = Div ("refs", refclasses, refkvs) refs
+ addUnNumbered cs = "unnumbered" : [c | c <- cs, c /= "unnumbered"]
+ go :: Block -> State Bool Block
+ go (Div ("refs",cs,kvs) xs) = do
+ put True
+ -- refHeader isn't used if you have an explicit references div
+ let cs' = ordNub $ cs ++ refclasses
+ return $ Div ("refs",cs',kvs) (xs ++ refs)
+ go x = return x
+
+refTitle :: Meta -> Maybe [Inline]
+refTitle meta =
+ case lookupMeta "reference-section-title" meta of
+ Just (MetaString s) -> Just [Str s]
+ Just (MetaInlines ils) -> Just ils
+ Just (MetaBlocks [Plain ils]) -> Just ils
+ Just (MetaBlocks [Para ils]) -> Just ils
+ _ -> Nothing
+
+isRefRemove :: Meta -> Bool
+isRefRemove meta =
+ maybe False truish $ lookupMeta "suppress-bibliography" meta
+
+legacyDateRanges :: Reference Inlines -> Reference Inlines
+legacyDateRanges ref =
+ ref{ referenceVariables = M.map go $ referenceVariables ref }
+ where
+ go (DateVal d)
+ | null (dateParts d)
+ , Just lit <- dateLiteral d
+ = case T.splitOn "_" lit of
+ [x,y] -> case Citeproc.rawDateEDTF (x <> "/" <> y) of
+ Just d' -> DateVal d'
+ Nothing -> DateVal d
+ _ -> DateVal d
+ go x = x
+
+linkifyVariables :: Reference Inlines -> Reference Inlines
+linkifyVariables ref =
+ ref{ referenceVariables = M.mapWithKey go $ referenceVariables ref }
+ where
+ go "URL" x = tolink "https://" x
+ go "DOI" x = tolink "https://doi.org/" x
+ go "ISBN" x = tolink "https://worldcat.org/isbn/" x
+ go "PMID" x = tolink "https://www.ncbi.nlm.nih.gov/pubmed/" x
+ go "PMCID" x = tolink "https://www.ncbi.nlm.nih.gov/pmc/articles/" x
+ go _ x = x
+ tolink pref x = let x' = extractText x
+ x'' = if "://" `T.isInfixOf` x'
+ then x'
+ else pref <> x'
+ in FancyVal (B.link x'' "" (B.str x'))
+
+extractText :: Val Inlines -> Text
+extractText (TextVal x) = x
+extractText (FancyVal x) = toText x
+extractText (NumVal n) = T.pack (show n)
+extractText _ = mempty
+
+deNote :: Inline -> Inline
+deNote (Note bs) = Note $ walk go bs
+ where
+ go (Note bs')
+ = Span ("",[],[]) (Space : Str "(" :
+ (removeFinalPeriod
+ (blocksToInlines bs')) ++ [Str ")"])
+ go x = x
+deNote x = x
+
+-- Note: we can't use dropTextWhileEnd because this would
+-- remove the final period on abbreviations like Ibid.
+-- But removing a final Str "." is safe.
+removeFinalPeriod :: [Inline] -> [Inline]
+removeFinalPeriod ils =
+ case lastMay ils of
+ Just (Str ".") -> initSafe ils
+ _ -> ils
+
+
+
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
new file mode 100644
index 000000000..5919fee77
--- /dev/null
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -0,0 +1,1237 @@
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module : Text.CSL.Input.Bibtex
+-- Copyright : (c) John MacFarlane
+-- License : BSD-style (see LICENSE)
+--
+-- Maintainer : John MacFarlane <fiddlosopher@gmail.com>
+-- Stability : unstable-- Portability : unportable
+--
+-----------------------------------------------------------------------------
+
+module Text.Pandoc.Citeproc.BibTeX
+ ( Variant(..)
+ , readBibtexString
+ )
+ where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder as B
+import Text.Pandoc.Readers.LaTeX (readLaTeX)
+import Text.Pandoc.Extensions (Extension(..), extensionsFromList)
+import Text.Pandoc.Options (ReaderOptions(..))
+import Text.Pandoc.Class (runPure)
+import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.Shared (stringify)
+import qualified Text.Pandoc.Walk as Walk
+import Citeproc.Types
+import Citeproc.CaseTransform (withSentenceCase)
+import Citeproc.Pandoc (caseTransform)
+import Text.Pandoc.Citeproc.Util (toIETF)
+import Text.Pandoc.Citeproc.Data (biblatexStringMap)
+import Data.Default
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Map as Map
+import Data.Maybe
+import Text.Parsec hiding (State, many, (<|>))
+import Control.Applicative
+import Data.List.Split (splitOn, splitWhen, wordsBy)
+import Control.Monad.RWS hiding ((<>))
+import qualified Data.Sequence as Seq
+import Data.Char (isAlphaNum, isDigit, isLetter,
+ isUpper, toLower, toUpper,
+ isLower, isPunctuation)
+import Data.List (foldl', intercalate)
+import Safe (readMay)
+
+data Variant = Bibtex | Biblatex
+ deriving (Show, Eq, Ord)
+
+-- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
+readBibtexString :: Variant -- ^ bibtex or biblatex
+ -> Locale -- ^ Locale
+ -> (Text -> Bool) -- ^ Filter on citation ids
+ -> Text -- ^ bibtex/biblatex text
+ -> Either ParseError [Reference Inlines]
+readBibtexString variant locale idpred contents = do
+ case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
+ mapM (itemToReference locale variant) .
+ filter (idpred . identifier))
+ (fromMaybe defaultLang $ localeLanguage locale, Map.empty)
+ "" contents of
+ Left err -> Left err
+ Right xs -> return xs
+
+defaultLang :: Lang
+defaultLang = Lang "en" (Just "US")
+
+-- a map of bibtex "string" macros
+type StringMap = Map.Map Text Text
+
+type BibParser = Parsec Text (Lang, StringMap)
+
+data Item = Item{ identifier :: Text
+ , sourcePos :: SourcePos
+ , entryType :: Text
+ , fields :: Map.Map Text Text
+ }
+ deriving (Show, Ord, Eq)
+
+itemToReference :: Locale -> Variant -> Item -> BibParser (Reference Inlines)
+itemToReference locale variant item = do
+ setPosition (sourcePos item)
+ bib item $ do
+ let lang = fromMaybe defaultLang $ localeLanguage locale
+ modify $ \st -> st{ localeLang = lang,
+ untitlecase = case lang of
+ (Lang "en" _) -> True
+ _ -> False }
+
+ id' <- asks identifier
+ otherIds <- (Just <$> getRawField "ids")
+ <|> return Nothing
+ (reftype, genre) <- getTypeAndGenre
+ -- hyphenation:
+ let getLangId = do
+ langid <- T.strip . T.toLower <$> getRawField "langid"
+ idopts <- T.strip . T.toLower . stringify <$>
+ getField "langidopts" <|> return ""
+ case (langid, idopts) of
+ ("english","variant=british") -> return "british"
+ ("english","variant=american") -> return "american"
+ ("english","variant=us") -> return "american"
+ ("english","variant=usmax") -> return "american"
+ ("english","variant=uk") -> return "british"
+ ("english","variant=australian") -> return "australian"
+ ("english","variant=newzealand") -> return "newzealand"
+ (x,_) -> return x
+ hyphenation <- (Just . toIETF . T.toLower <$>
+ (getLangId <|> getRawField "hyphenation"))
+ <|> return Nothing
+ modify $ \s -> s{ untitlecase = untitlecase s &&
+ case hyphenation of
+ Just x -> "en-" `T.isPrefixOf` x
+ _ -> True }
+
+
+ opts <- (parseOptions <$> getRawField "options") <|> return []
+
+ et <- asks entryType
+
+ -- titles
+ let isArticle = et `elem`
+ ["article", "periodical", "suppperiodical", "review"]
+ let isPeriodical = et == "periodical"
+ let isChapterlike = et `elem`
+ ["inbook","incollection","inproceedings","inreference","bookinbook"]
+
+ let getFieldMaybe f = (Just <$> getField f) <|> return Nothing
+
+ -- names
+ let getNameList' f = Just <$>
+ getNameList (("bibtex", case variant of
+ Bibtex -> "true"
+ Biblatex -> "false") : opts) f
+
+ author' <- getNameList' "author" <|> return Nothing
+ containerAuthor' <- getNameList' "bookauthor" <|> return Nothing
+ translator' <- getNameList' "translator" <|> return Nothing
+ editortype <- getRawField "editortype" <|> return mempty
+ editor'' <- getNameList' "editor" <|> return Nothing
+ director'' <- getNameList' "director" <|> return Nothing
+ let (editor', director') = case editortype of
+ "director" -> (Nothing, editor'')
+ _ -> (editor'', director'')
+ -- FIXME: add same for editora, editorb, editorc
+
+ -- dates
+ issued' <- (Just <$> (getDate "date" <|> getOldDate mempty)) <|>
+ return Nothing
+ eventDate' <- (Just <$> (getDate "eventdate" <|> getOldDate "event")) <|>
+ return Nothing
+ origDate' <- (Just <$> (getDate "origdate" <|> getOldDate "orig")) <|>
+ return Nothing
+ accessed' <- (Just <$> (getDate "urldate" <|> getOldDate "url")) <|>
+ return Nothing
+
+ -- locators
+ pages' <- getFieldMaybe "pages"
+ volume' <- getFieldMaybe "volume"
+ part' <- getFieldMaybe "part"
+ volumes' <- getFieldMaybe "volumes"
+ pagetotal' <- getFieldMaybe "pagetotal"
+ chapter' <- getFieldMaybe "chapter"
+ edition' <- getFieldMaybe "edition"
+ version' <- getFieldMaybe "version"
+ (number', collectionNumber', issue') <-
+ (getField "number" >>= \x ->
+ if et `elem` ["book","collection","proceedings","reference",
+ "mvbook","mvcollection","mvproceedings", "mvreference",
+ "bookinbook","inbook", "incollection","inproceedings",
+ "inreference", "suppbook","suppcollection"]
+ then return (Nothing, Just x, Nothing)
+ else if isArticle
+ then (getField "issue" >>= \y ->
+ return (Nothing, Nothing, Just $ concatWith ',' [x,y]))
+ <|> return (Nothing, Nothing, Just x)
+ else return (Just x, Nothing, Nothing))
+ <|> return (Nothing, Nothing, Nothing)
+
+ -- titles
+ hasMaintitle <- (True <$ getRawField "maintitle") <|> return False
+
+ title' <- Just <$>
+ ((guard isPeriodical >> getTitle "issuetitle")
+ <|> (guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "maintitle")
+ <|> getTitle "title")
+ <|> return Nothing
+
+ subtitle' <- (guard isPeriodical >> getTitle "issuesubtitle")
+ <|> (guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "mainsubtitle")
+ <|> getTitle "subtitle"
+ <|> return mempty
+ titleaddon' <- (guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "maintitleaddon")
+ <|> getTitle "titleaddon"
+ <|> return mempty
+
+ volumeTitle' <- Just <$>
+ ((guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "title")
+ <|> (guard hasMaintitle >>
+ guard isChapterlike >>
+ getTitle "booktitle"))
+ <|> return Nothing
+ volumeSubtitle' <- (guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "subtitle")
+ <|> (guard hasMaintitle >>
+ guard isChapterlike >>
+ getTitle "booksubtitle")
+ <|> return mempty
+ volumeTitleAddon' <- (guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "titleaddon")
+ <|> (guard hasMaintitle >>
+ guard isChapterlike >>
+ getTitle "booktitleaddon")
+ <|> return mempty
+
+ containerTitle' <- Just <$>
+ ((guard isPeriodical >> getPeriodicalTitle "title")
+ <|> (guard isChapterlike >> getTitle "maintitle")
+ <|> (guard isChapterlike >> getTitle "booktitle")
+ <|> getPeriodicalTitle "journaltitle"
+ <|> getPeriodicalTitle "journal")
+ <|> return Nothing
+ containerSubtitle' <- (guard isPeriodical >> getPeriodicalTitle "subtitle")
+ <|> (guard isChapterlike >> getTitle "mainsubtitle")
+ <|> (guard isChapterlike >> getTitle "booksubtitle")
+ <|> getPeriodicalTitle "journalsubtitle"
+ <|> return mempty
+ containerTitleAddon' <- (guard isPeriodical >>
+ getPeriodicalTitle "titleaddon")
+ <|> (guard isChapterlike >>
+ getTitle "maintitleaddon")
+ <|> (guard isChapterlike >>
+ getTitle "booktitleaddon")
+ <|> return mempty
+ containerTitleShort' <- Just <$>
+ ((guard isPeriodical >>
+ guard (not hasMaintitle) >>
+ getField "shorttitle")
+ <|> getPeriodicalTitle "shortjournal")
+ <|> return Nothing
+
+ -- change numerical series title to e.g. 'series 3'
+ let fixSeriesTitle [Str xs] | isNumber xs =
+ [Str (ordinalize locale xs), Space, Str (resolveKey' lang "jourser")]
+ fixSeriesTitle xs = xs
+ seriesTitle' <- (Just . B.fromList . fixSeriesTitle .
+ B.toList . resolveKey lang <$>
+ getTitle "series") <|>
+ return Nothing
+ shortTitle' <- Just <$>
+ ((guard (not hasMaintitle || isChapterlike) >>
+ getTitle "shorttitle")
+ <|> if (subtitle' /= mempty || titleaddon' /= mempty) &&
+ not hasMaintitle
+ then getShortTitle False "title"
+ else getShortTitle True "title")
+ <|> return Nothing
+
+ eventTitle' <- Just <$> getTitle "eventtitle" <|> return Nothing
+ origTitle' <- Just <$> getTitle "origtitle" <|> return Nothing
+
+ -- publisher
+ pubfields <- mapM (\f -> Just `fmap`
+ (if variant == Bibtex || f == "howpublished"
+ then getField f
+ else getLiteralList' f)
+ <|> return Nothing)
+ ["school","institution","organization", "howpublished","publisher"]
+ let publisher' = case catMaybes pubfields of
+ [] -> Nothing
+ xs -> Just $ concatWith ';' xs
+ origpublisher' <- (Just <$> getField "origpublisher") <|> return Nothing
+
+ -- places
+ venue' <- (Just <$> getField "venue") <|> return Nothing
+ address' <- Just <$>
+ (if variant == Bibtex
+ then getField "address"
+ else getLiteralList' "address"
+ <|> (guard (et /= "patent") >>
+ getLiteralList' "location"))
+ <|> return Nothing
+ origLocation' <- Just <$>
+ (if variant == Bibtex
+ then getField "origlocation"
+ else getLiteralList' "origlocation")
+ <|> return Nothing
+ jurisdiction' <- if reftype == "patent"
+ then Just <$>
+ (concatWith ';' . map (resolveKey lang) <$>
+ getLiteralList "location") <|> return Nothing
+ else return Nothing
+
+ -- url, doi, isbn, etc.:
+ -- note that with eprinttype = arxiv, we take eprint to be a partial url
+ -- archivePrefix is an alias for eprinttype
+ url' <- (guard (et == "online" || lookup "url" opts /= Just "false")
+ >> Just <$> getRawField "url")
+ <|> (do etype <- getRawField "eprinttype"
+ eprint <- getRawField "eprint"
+ let baseUrl =
+ case T.toLower etype of
+ "arxiv" -> "http://arxiv.org/abs/"
+ "jstor" -> "http://www.jstor.org/stable/"
+ "pubmed" -> "http://www.ncbi.nlm.nih.gov/pubmed/"
+ "googlebooks" -> "http://books.google.com?id="
+ _ -> ""
+ if T.null baseUrl
+ then mzero
+ else return $ Just $ baseUrl <> eprint)
+ <|> return Nothing
+ doi' <- (guard (lookup "doi" opts /= Just "false") >>
+ Just <$> getRawField "doi")
+ <|> return Nothing
+ isbn' <- Just <$> getRawField "isbn" <|> return Nothing
+ issn' <- Just <$> getRawField "issn" <|> return Nothing
+ pmid' <- Just <$> getRawField "pmid" <|> return Nothing
+ pmcid' <- Just <$> getRawField "pmcid" <|> return Nothing
+ callNumber' <- Just <$> getRawField "library" <|> return Nothing
+
+ -- notes
+ annotation' <- Just <$>
+ (getField "annotation" <|> getField "annote")
+ <|> return Nothing
+ abstract' <- Just <$> getField "abstract" <|> return Nothing
+ keywords' <- Just <$> getField "keywords" <|> return Nothing
+ note' <- if et == "periodical"
+ then return Nothing
+ else Just <$> getField "note" <|> return Nothing
+ addendum' <- if variant == Bibtex
+ then return Nothing
+ else Just <$> getField "addendum"
+ <|> return Nothing
+ pubstate' <- ( (Just . resolveKey lang <$> getField "pubstate")
+ <|> case dateLiteral <$> issued' of
+ Just (Just "forthcoming") ->
+ return $ Just $ B.str "forthcoming"
+ _ -> return Nothing
+ )
+
+
+
+
+ let addField (_, Nothing) = id
+ addField (f, Just x) = Map.insert f x
+ let vars = foldr addField mempty
+ [ ("other-ids", TextVal <$> otherIds)
+ , ("genre", TextVal <$> genre)
+ , ("language", TextVal <$> hyphenation)
+ -- dates
+ , ("accessed", DateVal <$> accessed')
+ , ("event-date", DateVal <$> eventDate')
+ , ("issued", DateVal <$> issued')
+ , ("original-date", DateVal <$> origDate')
+ -- names
+ , ("author", NamesVal <$> author')
+ , ("editor", NamesVal <$> editor')
+ , ("translator", NamesVal <$> translator')
+ , ("director", NamesVal <$> director')
+ , ("container-author", NamesVal <$> containerAuthor')
+ -- locators
+ , ("page", FancyVal . Walk.walk convertEnDash <$> pages')
+ , ("number-of-pages", FancyVal <$> pagetotal')
+ , ("volume", case (volume', part') of
+ (Nothing, Nothing) -> Nothing
+ (Just v, Nothing) -> Just $ FancyVal v
+ (Nothing, Just p) -> Just $ FancyVal p
+ (Just v, Just p) ->
+ Just $ FancyVal $ v <> B.str "." <> p)
+ , ("number-of-volumes", FancyVal <$> volumes')
+ , ("chapter-number", FancyVal <$> chapter')
+ , ("edition", FancyVal <$> edition')
+ , ("version", FancyVal <$> version')
+ , ("number", FancyVal <$> number')
+ , ("collection-number", FancyVal <$> collectionNumber')
+ , ("issue", FancyVal <$> issue')
+ -- title
+ , ("original-title", FancyVal <$> origTitle')
+ , ("event", FancyVal <$> eventTitle')
+ , ("title", case title' of
+ Just t -> Just $ FancyVal $
+ concatWith '.' [
+ concatWith ':' [t, subtitle']
+ , titleaddon' ]
+ Nothing -> Nothing)
+ , ("volume-title",
+ case volumeTitle' of
+ Just t -> Just $ FancyVal $
+ concatWith '.' [
+ concatWith ':' [t, volumeSubtitle']
+ , volumeTitleAddon' ]
+ Nothing -> Nothing)
+ , ("container-title",
+ case containerTitle' of
+ Just t -> Just $ FancyVal $
+ concatWith '.' [
+ concatWith ':' [t,
+ containerSubtitle']
+ , containerTitleAddon' ]
+ Nothing -> Nothing)
+ , ("container-title-short", FancyVal <$> containerTitleShort')
+ , ("collection-title", FancyVal <$> seriesTitle')
+ , ("title-short", FancyVal <$> shortTitle')
+ -- publisher
+ , ("publisher", FancyVal <$> publisher')
+ , ("original-publisher", FancyVal <$> origpublisher')
+ -- places
+ , ("jurisdiction", FancyVal <$> jurisdiction')
+ , ("event-place", FancyVal <$> venue')
+ , ("publisher-place", FancyVal <$> address')
+ , ("original-publisher-place", FancyVal <$> origLocation')
+ -- urls
+ , ("url", TextVal <$> url')
+ , ("doi", TextVal <$> doi')
+ , ("isbn", TextVal <$> isbn')
+ , ("issn", TextVal <$> issn')
+ , ("pmcid", TextVal <$> pmcid')
+ , ("pmid", TextVal <$> pmid')
+ , ("call-number", TextVal <$> callNumber')
+ -- notes
+ , ("note", case catMaybes [note', addendum'] of
+ [] -> Nothing
+ xs -> return $ FancyVal $ concatWith '.' xs)
+ , ("annote", FancyVal <$> annotation')
+ , ("abstract", FancyVal <$> abstract')
+ , ("keyword", FancyVal <$> keywords')
+ , ("status", FancyVal <$> pubstate')
+ ]
+ return $ Reference
+ { referenceId = ItemId id'
+ , referenceType = reftype
+ , referenceDisambiguation = Nothing
+ , referenceVariables = vars }
+
+
+bib :: Item -> Bib a -> BibParser a
+bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US")))
+
+resolveCrossRefs :: Variant -> [Item] -> [Item]
+resolveCrossRefs variant entries =
+ map (resolveCrossRef variant entries) entries
+
+resolveCrossRef :: Variant -> [Item] -> Item -> Item
+resolveCrossRef variant entries entry =
+ Map.foldrWithKey go entry (fields entry)
+ where go key val entry' =
+ if key == "crossref" || key == "xdata"
+ then entry'{ fields = fields entry' <>
+ Map.fromList (getXrefFields variant
+ entry entries val) }
+ else entry'
+
+getXrefFields :: Variant -> Item -> [Item] -> Text -> [(Text, Text)]
+getXrefFields variant baseEntry entries keys = do
+ let keys' = splitKeys keys
+ xrefEntry <- [e | e <- entries, identifier e `elem` keys']
+ (k, v) <- Map.toList $ fields xrefEntry
+ if k == "crossref" || k == "xdata"
+ then do
+ xs <- mapM (getXrefFields variant baseEntry entries)
+ (splitKeys v)
+ (x, y) <- xs
+ guard $ isNothing $ Map.lookup x $ fields xrefEntry
+ return (x, y)
+ else do
+ k' <- case variant of
+ Bibtex -> return k
+ Biblatex -> transformKey
+ (entryType xrefEntry) (entryType baseEntry) k
+ guard $ isNothing $ Map.lookup k' $ fields baseEntry
+ return (k',v)
+
+
+
+data BibState = BibState{
+ untitlecase :: Bool
+ , localeLang :: Lang
+ }
+
+type Bib = RWST Item () BibState BibParser
+
+blocksToInlines :: [Block] -> Inlines
+blocksToInlines bs =
+ case bs of
+ [Plain xs] -> B.fromList xs
+ [Para xs] -> B.fromList xs
+ _ -> B.fromList $ Walk.query (:[]) bs
+
+adjustSpans :: Lang -> Inline -> Inline
+adjustSpans lang (RawInline (Format "latex") s)
+ | s == "\\hyphen" || s == "\\hyphen " = Str "-"
+ | otherwise = parseRawLaTeX lang s
+adjustSpans _ SoftBreak = Space
+adjustSpans _ x = x
+
+parseRawLaTeX :: Lang -> Text -> Inline
+parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) =
+ case parseLaTeX lang contents of
+ Right [Para ys] -> f command ys
+ Right [Plain ys] -> f command ys
+ Right [] -> f command []
+ _ -> RawInline (Format "latex") t
+ where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs
+ command = T.strip command'
+ contents = T.drop 1 $ T.dropEnd 1 contents'
+ f "mkbibquote" ils = Span nullAttr [Quoted DoubleQuote ils]
+ f "mkbibemph" ils = Span nullAttr [Emph ils]
+ f "mkbibitalic" ils = Span nullAttr [Emph ils]
+ f "mkbibbold" ils = Span nullAttr [Strong ils]
+ f "mkbibparens" ils = Span nullAttr $
+ [Str "("] ++ ils ++ [Str ")"]
+ f "mkbibbrackets" ils = Span nullAttr $
+ [Str "["] ++ ils ++ [Str "]"]
+ -- ... both should be nestable & should work in year fields
+ f "autocap" ils = Span nullAttr ils
+ -- TODO: should work in year fields
+ f "textnormal" ils = Span ("",["nodecor"],[]) ils
+ f "bibstring" [Str s] = Str $ resolveKey' lang s
+ f "adddot" [] = Str "."
+ f "adddotspace" [] = Span nullAttr [Str ".", Space]
+ f "addabbrvspace" [] = Space
+ f _ ils = Span nullAttr ils
+parseRawLaTeX _ t = RawInline (Format "latex") t
+
+latex' :: Text -> Bib [Block]
+latex' t = do
+ lang <- gets localeLang
+ case parseLaTeX lang t of
+ Left _ -> mzero
+ Right bs -> return bs
+
+parseLaTeX :: Lang -> Text -> Either PandocError [Block]
+parseLaTeX lang t =
+ case runPure (readLaTeX
+ def{ readerExtensions =
+ extensionsFromList [Ext_raw_tex, Ext_smart] } t) of
+ Left e -> Left e
+ Right (Pandoc _ bs) -> Right $ Walk.walk (adjustSpans lang) bs
+
+latex :: Text -> Bib Inlines
+latex = fmap blocksToInlines . latex' . T.strip
+
+type Options = [(Text, Text)]
+
+parseOptions :: Text -> Options
+parseOptions = map breakOpt . T.splitOn ","
+ where breakOpt x = case T.break (=='=') x of
+ (w,v) -> (T.toLower $ T.strip w,
+ T.toLower $ T.strip $ T.drop 1 v)
+
+bibEntries :: BibParser [Item]
+bibEntries = do
+ skipMany nonEntry
+ many (bibItem <* skipMany nonEntry)
+ where nonEntry = bibSkip <|>
+ try (char '@' >>
+ (bibComment <|> bibPreamble <|> bibString))
+
+bibSkip :: BibParser ()
+bibSkip = skipMany1 (satisfy (/='@'))
+
+bibComment :: BibParser ()
+bibComment = do
+ cistring "comment"
+ spaces
+ void inBraces <|> bibSkip <|> return ()
+
+bibPreamble :: BibParser ()
+bibPreamble = do
+ cistring "preamble"
+ spaces
+ void inBraces
+
+bibString :: BibParser ()
+bibString = do
+ cistring "string"
+ spaces
+ char '{'
+ spaces
+ (k,v) <- entField
+ char '}'
+ updateState (\(l,m) -> (l, Map.insert k v m))
+ return ()
+
+inBraces :: BibParser Text
+inBraces = try $ do
+ char '{'
+ res <- manyTill
+ ( (T.pack <$> many1 (noneOf "{}\\"))
+ <|> (char '\\' >> ( (char '{' >> return "\\{")
+ <|> (char '}' >> return "\\}")
+ <|> return "\\"))
+ <|> (braced <$> inBraces)
+ ) (char '}')
+ return $ T.concat res
+
+braced :: Text -> Text
+braced = T.cons '{' . flip T.snoc '}'
+
+inQuotes :: BibParser Text
+inQuotes = do
+ char '"'
+ T.concat <$> manyTill
+ ( (T.pack <$> many1 (noneOf "\"\\{"))
+ <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
+ <|> braced <$> inBraces
+ ) (char '"')
+
+fieldName :: BibParser Text
+fieldName = resolveAlias . T.toLower . T.pack
+ <$> many1 (letter <|> digit <|> oneOf "-_:+")
+
+isBibtexKeyChar :: Char -> Bool
+isBibtexKeyChar c =
+ isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*&" :: [Char])
+
+bibItem :: BibParser Item
+bibItem = do
+ char '@'
+ pos <- getPosition
+ enttype <- map toLower <$> many1 letter
+ spaces
+ char '{'
+ spaces
+ entid <- many1 (satisfy isBibtexKeyChar)
+ spaces
+ char ','
+ spaces
+ entfields <- entField `sepEndBy` (char ',' >> spaces)
+ spaces
+ char '}'
+ return $ Item (T.pack entid) pos (T.pack enttype) (Map.fromList entfields)
+
+entField :: BibParser (Text, Text)
+entField = do
+ k <- fieldName
+ spaces
+ char '='
+ spaces
+ vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
+ try (spaces >> char '#' >> spaces)
+ spaces
+ return (k, T.concat vs)
+
+resolveAlias :: Text -> Text
+resolveAlias "archiveprefix" = "eprinttype"
+resolveAlias "primaryclass" = "eprintclass"
+resolveAlias s = s
+
+rawWord :: BibParser Text
+rawWord = T.pack <$> many1 alphaNum
+
+expandString :: BibParser Text
+expandString = do
+ k <- fieldName
+ (lang, strs) <- getState
+ case Map.lookup k strs of
+ Just v -> return v
+ Nothing -> return $ resolveKey' lang k
+
+cistring :: Text -> BibParser Text
+cistring s = try (go s)
+ where go t = case T.uncons t of
+ Nothing -> return ""
+ Just (c,cs) -> do
+ x <- char (toLower c) <|> char (toUpper c)
+ xs <- go cs
+ return (T.cons x xs)
+
+splitKeys :: Text -> [Text]
+splitKeys = filter (not . T.null) . T.split (\c -> c == ' ' || c == ',')
+
+-- Biblatex Localization Keys (see Biblatex manual)
+-- Currently we only map a subset likely to be used in Biblatex *databases*
+-- (in fields such as `type`, and via `\bibstring{}` commands).
+
+parseMonth :: Text -> Maybe Int
+parseMonth s =
+ case T.toLower s of
+ "jan" -> Just 1
+ "feb" -> Just 2
+ "mar" -> Just 3
+ "apr" -> Just 4
+ "may" -> Just 5
+ "jun" -> Just 6
+ "jul" -> Just 7
+ "aug" -> Just 8
+ "sep" -> Just 9
+ "oct" -> Just 10
+ "nov" -> Just 11
+ "dec" -> Just 12
+ _ -> readMay (T.unpack s)
+
+notFound :: Text -> Bib a
+notFound f = Prelude.fail $ T.unpack f ++ " not found"
+
+getField :: Text -> Bib Inlines
+getField f = do
+ fs <- asks fields
+ case Map.lookup f fs of
+ Just x -> latex x
+ Nothing -> notFound f
+
+
+getPeriodicalTitle :: Text -> Bib Inlines
+getPeriodicalTitle f = do
+ ils <- getField f
+ return ils
+
+protectCase :: (Inlines -> Inlines) -> (Inlines -> Inlines)
+protectCase f = Walk.walk unprotect . f . Walk.walk protect
+ where
+ protect (Span ("",[],[]) xs) = Span ("",["nocase"],[]) xs
+ protect x = x
+ unprotect (Span ("",["nocase"],[]) xs)
+ | hasLowercaseWord xs = Span ("",["nocase"],[]) xs
+ | otherwise = Span ("",[],[]) xs
+ unprotect x = x
+ hasLowercaseWord = any startsWithLowercase . splitStrWhen isPunctuation
+ startsWithLowercase (Str (T.uncons -> Just (x,_))) = isLower x
+ startsWithLowercase _ = False
+
+unTitlecase :: Maybe Lang -> Inlines -> Inlines
+unTitlecase mblang = protectCase (caseTransform (withSentenceCase mblang))
+
+getTitle :: Text -> Bib Inlines
+getTitle f = do
+ ils <- getField f
+ utc <- gets untitlecase
+ lang <- gets localeLang
+ let processTitle = if utc then unTitlecase (Just lang) else id
+ return $ processTitle ils
+
+getShortTitle :: Bool -> Text -> Bib Inlines
+getShortTitle requireColon f = do
+ ils <- splitStrWhen (==':') . B.toList <$> getTitle f
+ if not requireColon || containsColon ils
+ then return $ B.fromList $ upToColon ils
+ else return mempty
+
+containsColon :: [Inline] -> Bool
+containsColon xs = Str ":" `elem` xs
+
+upToColon :: [Inline] -> [Inline]
+upToColon xs = takeWhile (/= Str ":") xs
+
+isNumber :: Text -> Bool
+isNumber t = case T.uncons t of
+ Just ('-', ds) -> T.all isDigit ds
+ Just _ -> T.all isDigit t
+ Nothing -> False
+
+getDate :: Text -> Bib Date
+getDate f = do
+ -- the ~ can used for approx dates, but the latex reader
+ -- parses this as a nonbreaking space, so we need to convert it back!
+ let nbspToTilde '\160' = '~'
+ nbspToTilde c = c
+ mbd <- rawDateEDTF . T.map nbspToTilde <$> getRawField f
+ case mbd of
+ Nothing -> Prelude.fail "expected date"
+ Just d -> return d
+
+-- A negative (BC) year might be written with -- or --- in bibtex:
+fixLeadingDash :: Text -> Text
+fixLeadingDash t = case T.uncons t of
+ Just (c, ds) | (c == '–' || c == '—') && firstIsDigit ds -> T.cons '–' ds
+ _ -> t
+ where firstIsDigit = maybe False (isDigit . fst) . T.uncons
+
+getOldDate :: Text -> Bib Date
+getOldDate prefix = do
+ year' <- (readMay . T.unpack . fixLeadingDash . stringify
+ <$> getField (prefix <> "year")) <|> return Nothing
+ month' <- (parseMonth <$> getRawField (prefix <> "month"))
+ <|> return Nothing
+ day' <- (readMay . T.unpack <$> getRawField (prefix <> "day"))
+ <|> return Nothing
+ endyear' <- (readMay . T.unpack . fixLeadingDash . stringify
+ <$> getField (prefix <> "endyear")) <|> return Nothing
+ endmonth' <- (parseMonth . stringify
+ <$> getField (prefix <> "endmonth")) <|> return Nothing
+ endday' <- (readMay . T.unpack . stringify <$>
+ getField (prefix <> "endday")) <|> return Nothing
+ let toDateParts (y', m', d') =
+ DateParts $
+ case y' of
+ Nothing -> []
+ Just y ->
+ case m' of
+ Nothing -> [y]
+ Just m ->
+ case d' of
+ Nothing -> [y,m]
+ Just d -> [y,m,d]
+ let dateparts = filter (\x -> x /= DateParts [])
+ $ map toDateParts [(year',month',day'),
+ (endyear',endmonth',endday')]
+ literal <- if null dateparts
+ then Just <$> getRawField (prefix <> "year")
+ else return Nothing
+ return $
+ Date { dateParts = dateparts
+ , dateCirca = False
+ , dateSeason = Nothing
+ , dateLiteral = literal }
+
+getRawField :: Text -> Bib Text
+getRawField f =
+ (stringify <$> getField f)
+ <|> do fs <- asks fields
+ case Map.lookup f fs of
+ Just x -> return x
+ Nothing -> notFound f
+
+getLiteralList :: Text -> Bib [Inlines]
+getLiteralList f = do
+ fs <- asks fields
+ case Map.lookup f fs of
+ Just x -> latex' x >>= toLiteralList
+ Nothing -> notFound f
+
+-- separates items with semicolons
+getLiteralList' :: Text -> Bib Inlines
+getLiteralList' f = do
+ fs <- asks fields
+ case Map.lookup f fs of
+ Just x -> do
+ x' <- latex' x
+ case x' of
+ [Para xs] ->
+ return $ B.fromList
+ $ intercalate [Str ";", Space]
+ $ splitByAnd xs
+ [Plain xs] ->
+ return $ B.fromList
+ $ intercalate [Str ";", Space]
+ $ splitByAnd xs
+ _ -> mzero
+ Nothing -> notFound f
+
+splitByAnd :: [Inline] -> [[Inline]]
+splitByAnd = splitOn [Space, Str "and", Space]
+
+toLiteralList :: [Block] -> Bib [Inlines]
+toLiteralList [Para xs] =
+ return $ map B.fromList $ splitByAnd xs
+toLiteralList [Plain xs] = toLiteralList [Para xs]
+toLiteralList _ = mzero
+
+concatWith :: Char -> [Inlines] -> Inlines
+concatWith sep = foldl' go mempty
+ where go :: Inlines -> Inlines -> Inlines
+ go accum s
+ | s == mempty = accum
+ | otherwise =
+ case Seq.viewr (B.unMany accum) of
+ Seq.EmptyR -> s
+ _ Seq.:> Str x
+ | not (T.null x) &&
+ T.last x `elem` ("!?.,:;" :: String)
+ -> accum <> B.space <> s
+ _ -> accum <> B.str (T.singleton sep) <>
+ B.space <> s
+
+
+getNameList :: Options -> Text -> Bib [Name]
+getNameList opts f = do
+ fs <- asks fields
+ case Map.lookup f fs of
+ Just x -> latexNames opts x
+ Nothing -> notFound f
+
+toNameList :: Options -> [Block] -> Bib [Name]
+toNameList opts [Para xs] =
+ filter (/= emptyName) <$> mapM (toName opts . addSpaceAfterPeriod)
+ (splitByAnd xs)
+toNameList opts [Plain xs] = toNameList opts [Para xs]
+toNameList _ _ = mzero
+
+latexNames :: Options -> Text -> Bib [Name]
+latexNames opts t = latex' (T.strip t) >>= toNameList opts
+
+-- see issue 392 for motivation. We want to treat
+-- "J.G. Smith" and "J. G. Smith" the same.
+addSpaceAfterPeriod :: [Inline] -> [Inline]
+addSpaceAfterPeriod = go . splitStrWhen (=='.')
+ where
+ go [] = []
+ go (Str (T.unpack -> [c]):Str ".":Str (T.unpack -> [d]):xs)
+ | isLetter d
+ , isLetter c
+ , isUpper c
+ , isUpper d
+ = Str (T.singleton c):Str ".":Space:go (Str (T.singleton d):xs)
+ go (x:xs) = x:go xs
+
+emptyName :: Name
+emptyName =
+ Name { nameFamily = Nothing
+ , nameGiven = Nothing
+ , nameDroppingParticle = Nothing
+ , nameNonDroppingParticle = Nothing
+ , nameSuffix = Nothing
+ , nameLiteral = Nothing
+ , nameCommaSuffix = False
+ , nameStaticOrdering = False
+ }
+
+toName :: Options -> [Inline] -> Bib Name
+toName _ [Str "others"] =
+ return emptyName{ nameLiteral = Just "others" }
+toName _ [Span ("",[],[]) ils] = -- corporate author
+ return emptyName{ nameLiteral = Just $ stringify ils }
+ -- extended BibLaTeX name format - see #266
+toName _ ils@(Str ys:_) | T.any (== '=') ys = do
+ let commaParts = splitWhen (== Str ",")
+ . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160')
+ $ ils
+ let addPart ag (Str "given" : Str "=" : xs) =
+ ag{ nameGiven = case nameGiven ag of
+ Nothing -> Just $ stringify xs
+ Just t -> Just $ t <> " " <> stringify xs }
+ addPart ag (Str "family" : Str "=" : xs) =
+ ag{ nameFamily = Just $ stringify xs }
+ addPart ag (Str "prefix" : Str "=" : xs) =
+ ag{ nameDroppingParticle = Just $ stringify xs }
+ addPart ag (Str "useprefix" : Str "=" : Str "true" : _) =
+ ag{ nameNonDroppingParticle = nameDroppingParticle ag
+ , nameDroppingParticle = Nothing }
+ addPart ag (Str "suffix" : Str "=" : xs) =
+ ag{ nameSuffix = Just $ stringify xs }
+ addPart ag (Space : xs) = addPart ag xs
+ addPart ag _ = ag
+ return $ foldl' addPart emptyName commaParts
+-- First von Last
+-- von Last, First
+-- von Last, Jr ,First
+-- NOTE: biblatex and bibtex differ on:
+-- Drummond de Andrade, Carlos
+-- bibtex takes "Drummond de" as the von;
+-- biblatex takes the whole as a last name.
+-- See https://github.com/plk/biblatex/issues/236
+-- Here we implement the more sensible biblatex behavior.
+toName opts ils = do
+ let useprefix = optionSet "useprefix" opts
+ let usecomma = optionSet "juniorcomma" opts
+ let bibtex = optionSet "bibtex" opts
+ let words' = wordsBy (\x -> x == Space || x == Str "\160")
+ let commaParts = map words' $ splitWhen (== Str ",")
+ $ splitStrWhen
+ (\c -> c == ',' || c == '\160') ils
+ let (first, vonlast, jr) =
+ case commaParts of
+ --- First is the longest sequence of white-space separated
+ -- words starting with an uppercase and that is not the
+ -- whole string. von is the longest sequence of whitespace
+ -- separated words whose last word starts with lower case
+ -- and that is not the whole string.
+ [fvl] -> let (caps', rest') = span isCapitalized fvl
+ in if null rest' && not (null caps')
+ then (init caps', [last caps'], [])
+ else (caps', rest', [])
+ [vl,f] -> (f, vl, [])
+ (vl:j:f:_) -> (f, vl, j )
+ [] -> ([], [], [])
+
+ let (von, lastname) =
+ if bibtex
+ then case span isCapitalized $ reverse vonlast of
+ ([],w:ws) -> (reverse ws, [w])
+ (vs, ws) -> (reverse ws, reverse vs)
+ else case break isCapitalized vonlast of
+ (vs@(_:_), []) -> (init vs, [last vs])
+ (vs, ws) -> (vs, ws)
+ let prefix = T.unwords $ map stringify von
+ let family = T.unwords $ map stringify lastname
+ let suffix = T.unwords $ map stringify jr
+ let given = T.unwords $ map stringify first
+ return
+ Name { nameFamily = if T.null family
+ then Nothing
+ else Just family
+ , nameGiven = if T.null given
+ then Nothing
+ else Just given
+ , nameDroppingParticle = if useprefix || T.null prefix
+ then Nothing
+ else Just prefix
+ , nameNonDroppingParticle = if useprefix && not (T.null prefix)
+ then Just prefix
+ else Nothing
+ , nameSuffix = if T.null suffix
+ then Nothing
+ else Just suffix
+ , nameLiteral = Nothing
+ , nameCommaSuffix = usecomma
+ , nameStaticOrdering = False
+ }
+
+splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
+splitStrWhen _ [] = []
+splitStrWhen p (Str xs : ys) = map Str (go xs) ++ splitStrWhen p ys
+ where go s =
+ let (w,z) = T.break p s
+ in if T.null z
+ then if T.null w
+ then []
+ else [w]
+ else if T.null w
+ then (T.take 1 z : go (T.drop 1 z))
+ else (w : T.take 1 z : go (T.drop 1 z))
+splitStrWhen p (x : ys) = x : splitStrWhen p ys
+
+ordinalize :: Locale -> Text -> Text
+ordinalize locale n =
+ let terms = localeTerms locale
+ pad0 t = case T.length t of
+ 0 -> "00"
+ 1 -> "0" <> t
+ _ -> t
+ in case Map.lookup ("ordinal-" <> pad0 n) terms <|>
+ Map.lookup "ordinal" terms of
+ Nothing -> n
+ Just [] -> n
+ Just (t:_) -> n <> snd t
+
+isCapitalized :: [Inline] -> Bool
+isCapitalized (Str (T.uncons -> Just (c,cs)) : rest)
+ | isUpper c = True
+ | isDigit c = isCapitalized (Str cs : rest)
+ | otherwise = False
+isCapitalized (_:rest) = isCapitalized rest
+isCapitalized [] = True
+
+optionSet :: Text -> Options -> Bool
+optionSet key opts = case lookup key opts of
+ Just "true" -> True
+ Just s -> s == mempty
+ _ -> False
+
+getTypeAndGenre :: Bib (Text, Maybe Text)
+getTypeAndGenre = do
+ lang <- gets localeLang
+ et <- asks entryType
+ guard $ et /= "xdata"
+ reftype' <- resolveKey' lang <$> getRawField "type"
+ <|> return mempty
+ st <- getRawField "entrysubtype" <|> return mempty
+ isEvent <- (True <$ (getRawField "eventdate"
+ <|> getRawField "eventtitle"
+ <|> getRawField "venue")) <|> return False
+ let reftype =
+ case et of
+ "article"
+ | st == "magazine" -> "article-magazine"
+ | st == "newspaper" -> "article-newspaper"
+ | otherwise -> "article-journal"
+ "book" -> "book"
+ "booklet" -> "pamphlet"
+ "bookinbook" -> "chapter"
+ "collection" -> "book"
+ "dataset" -> "dataset"
+ "electronic" -> "webpage"
+ "inbook" -> "chapter"
+ "incollection" -> "chapter"
+ "inreference" -> "entry-encyclopedia"
+ "inproceedings" -> "paper-conference"
+ "manual" -> "book"
+ "mastersthesis" -> "thesis"
+ "misc" -> ""
+ "mvbook" -> "book"
+ "mvcollection" -> "book"
+ "mvproceedings" -> "book"
+ "mvreference" -> "book"
+ "online" -> "webpage"
+ "patent" -> "patent"
+ "periodical"
+ | st == "magazine" -> "article-magazine"
+ | st == "newspaper" -> "article-newspaper"
+ | otherwise -> "article-journal"
+ "phdthesis" -> "thesis"
+ "proceedings" -> "book"
+ "reference" -> "book"
+ "report" -> "report"
+ "software" -> "book" -- no "software" type in CSL
+ "suppbook" -> "chapter"
+ "suppcollection" -> "chapter"
+ "suppperiodical"
+ | st == "magazine" -> "article-magazine"
+ | st == "newspaper" -> "article-newspaper"
+ | otherwise -> "article-journal"
+ "techreport" -> "report"
+ "thesis" -> "thesis"
+ "unpublished" -> if isEvent then "speech" else "manuscript"
+ "www" -> "webpage"
+ -- biblatex, "unsupported"
+ "artwork" -> "graphic"
+ "audio" -> "song" -- for audio *recordings*
+ "commentary" -> "book"
+ "image" -> "graphic" -- or "figure" ?
+ "jurisdiction" -> "legal_case"
+ "legislation" -> "legislation" -- or "bill" ?
+ "legal" -> "treaty"
+ "letter" -> "personal_communication"
+ "movie" -> "motion_picture"
+ "music" -> "song" -- for musical *recordings*
+ "performance" -> "speech"
+ "review" -> "review" -- or "review-book" ?
+ "standard" -> "legislation"
+ "video" -> "motion_picture"
+ -- biblatex-apa:
+ "data" -> "dataset"
+ "letters" -> "personal_communication"
+ "newsarticle" -> "article-newspaper"
+ _ -> ""
+
+ let refgenre =
+ case et of
+ "mastersthesis" -> if T.null reftype'
+ then Just $ resolveKey' lang "mathesis"
+ else Just reftype'
+ "phdthesis" -> if T.null reftype'
+ then Just $ resolveKey' lang "phdthesis"
+ else Just reftype'
+ _ -> if T.null reftype'
+ then Nothing
+ else Just reftype'
+ return (reftype, refgenre)
+
+
+-- transformKey source target key
+-- derived from Appendix C of bibtex manual
+transformKey :: Text -> Text -> Text -> [Text]
+transformKey _ _ "ids" = []
+transformKey _ _ "crossref" = []
+transformKey _ _ "xref" = []
+transformKey _ _ "entryset" = []
+transformKey _ _ "entrysubtype" = []
+transformKey _ _ "execute" = []
+transformKey _ _ "label" = []
+transformKey _ _ "options" = []
+transformKey _ _ "presort" = []
+transformKey _ _ "related" = []
+transformKey _ _ "relatedoptions" = []
+transformKey _ _ "relatedstring" = []
+transformKey _ _ "relatedtype" = []
+transformKey _ _ "shorthand" = []
+transformKey _ _ "shorthandintro" = []
+transformKey _ _ "sortkey" = []
+transformKey x y "author"
+ | x `elem` ["mvbook", "book"] &&
+ y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"]
+-- note: this next clause is not in the biblatex manual, but it makes
+-- sense in the context of CSL conversion:
+transformKey x y "author"
+ | x == "mvbook" && y == "book" = ["bookauthor", "author"]
+transformKey "mvbook" y z
+ | y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z
+transformKey x y z
+ | x `elem` ["mvcollection", "mvreference"] &&
+ y `elem` ["collection", "reference", "incollection", "inreference",
+ "suppcollection"] = standardTrans z
+transformKey "mvproceedings" y z
+ | y `elem` ["proceedings", "inproceedings"] = standardTrans z
+transformKey "book" y z
+ | y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z
+transformKey x y z
+ | x `elem` ["collection", "reference"] &&
+ y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z
+transformKey "proceedings" "inproceedings" z = bookTrans z
+transformKey "periodical" y z
+ | y `elem` ["article", "suppperiodical"] =
+ case z of
+ "title" -> ["journaltitle"]
+ "subtitle" -> ["journalsubtitle"]
+ "shorttitle" -> []
+ "sorttitle" -> []
+ "indextitle" -> []
+ "indexsorttitle" -> []
+ _ -> [z]
+transformKey _ _ x = [x]
+
+standardTrans :: Text -> [Text]
+standardTrans z =
+ case z of
+ "title" -> ["maintitle"]
+ "subtitle" -> ["mainsubtitle"]
+ "titleaddon" -> ["maintitleaddon"]
+ "shorttitle" -> []
+ "sorttitle" -> []
+ "indextitle" -> []
+ "indexsorttitle" -> []
+ _ -> [z]
+
+bookTrans :: Text -> [Text]
+bookTrans z =
+ case z of
+ "title" -> ["booktitle"]
+ "subtitle" -> ["booksubtitle"]
+ "titleaddon" -> ["booktitleaddon"]
+ "shorttitle" -> []
+ "sorttitle" -> []
+ "indextitle" -> []
+ "indexsorttitle" -> []
+ _ -> [z]
+
+resolveKey :: Lang -> Inlines -> Inlines
+resolveKey lang ils = Walk.walk go ils
+ where go (Str s) = Str $ resolveKey' lang s
+ go x = x
+
+resolveKey' :: Lang -> Text -> Text
+resolveKey' lang@(Lang l _) k =
+ case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of
+ Nothing -> k
+ Just (x, _) -> either (const k) stringify $ parseLaTeX lang x
+
+convertEnDash :: Inline -> Inline
+convertEnDash (Str s) = Str (T.map (\c -> if c == '–' then '-' else c) s)
+convertEnDash x = x
diff --git a/src/Text/Pandoc/Citeproc/CslJson.hs b/src/Text/Pandoc/Citeproc/CslJson.hs
new file mode 100644
index 000000000..862af5188
--- /dev/null
+++ b/src/Text/Pandoc/Citeproc/CslJson.hs
@@ -0,0 +1,37 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Citeproc.CslJson
+ ( cslJsonToReferences )
+where
+
+import Citeproc.CslJson
+import Citeproc.Types
+import Control.Monad.Identity (runIdentity)
+import Data.Aeson (eitherDecodeStrict')
+import Data.ByteString (ByteString)
+import Text.Pandoc.Builder as B
+import Data.Text (Text)
+
+fromCslJson :: CslJson Text -> Inlines
+fromCslJson (CslText t) = B.text t
+fromCslJson CslEmpty = mempty
+fromCslJson (CslConcat x y) = fromCslJson x <> fromCslJson y
+fromCslJson (CslQuoted x) = B.doubleQuoted (fromCslJson x)
+fromCslJson (CslItalic x) = B.emph (fromCslJson x)
+fromCslJson (CslNormal x) = fromCslJson x -- TODO?
+fromCslJson (CslBold x) = B.strong (fromCslJson x)
+fromCslJson (CslUnderline x) = B.underline (fromCslJson x)
+fromCslJson (CslNoDecoration x) =
+ B.spanWith ("",["nodecoration"],[]) (fromCslJson x)
+fromCslJson (CslSmallCaps x) = B.smallcaps (fromCslJson x)
+fromCslJson (CslBaseline x) = fromCslJson x
+fromCslJson (CslSub x) = B.subscript (fromCslJson x)
+fromCslJson (CslSup x) = B.superscript (fromCslJson x)
+fromCslJson (CslNoCase x) = B.spanWith ("",["nocase"],[]) (fromCslJson x)
+fromCslJson (CslDiv t x) = B.spanWith ("",["csl-" <> t],[]) (fromCslJson x)
+
+cslJsonToReferences :: ByteString -> Either String [Reference Inlines]
+cslJsonToReferences raw =
+ case eitherDecodeStrict' raw of
+ Left e -> Left e
+ Right cslrefs -> Right $
+ map (runIdentity . traverse (return . fromCslJson)) cslrefs
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
new file mode 100644
index 000000000..dfdaf2598
--- /dev/null
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -0,0 +1,31 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TemplateHaskell #-}
+module Text.Pandoc.Citeproc.Data
+ (biblatexStringMap)
+where
+import Data.FileEmbed
+import Data.ByteString (ByteString)
+import qualified Data.Map as M
+import qualified Data.Text.Encoding as TE
+import qualified Data.Text as T
+import Data.Text (Text)
+import Text.Pandoc.Citeproc.Util (toIETF)
+import Citeproc (Lang(..), parseLang)
+
+biblatexLocalizations :: [(FilePath, ByteString)]
+biblatexLocalizations = $(embedDir "citeproc/biblatex-localization")
+
+-- biblatex localization keys, from files at
+-- http://github.com/plk/biblatex/tree/master/tex/latex/biblatex/lbx
+biblatexStringMap :: M.Map Text (M.Map Text (Text, Text))
+biblatexStringMap = foldr go mempty biblatexLocalizations
+ where
+ go (fp, bs) =
+ let Lang lang _ = parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp)
+ ls = T.lines $ TE.decodeUtf8 bs
+ in if length ls > 4
+ then M.insert lang (toStringMap $ map (T.splitOn "|") ls)
+ else id
+ toStringMap = foldr go' mempty
+ go' [term, x, y] = M.insert term (x, y)
+ go' _ = id
diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs
new file mode 100644
index 000000000..dba762c02
--- /dev/null
+++ b/src/Text/Pandoc/Citeproc/Locator.hs
@@ -0,0 +1,279 @@
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Citeproc.Locator
+ ( parseLocator )
+where
+import Citeproc.Types
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Parsec
+import Text.Pandoc.Definition
+import Text.Pandoc.Parsing (romanNumeral)
+import Text.Pandoc.Shared (stringify)
+import Control.Monad (mzero)
+import qualified Data.Map as M
+import Data.Char (isSpace, isPunctuation, isDigit)
+
+parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline])
+parseLocator locale inp =
+ case parse (pLocatorWords (toLocatorMap locale)) "suffix" $ splitInp inp of
+ Right r -> r
+ Left _ -> (Nothing, inp)
+
+splitInp :: [Inline] -> [Inline]
+splitInp = splitStrWhen (\c -> isSpace c || (isPunctuation c && c /= ':'))
+
+--
+-- Locator parsing
+--
+
+type LocatorParser = Parsec [Inline] ()
+
+pLocatorWords :: LocatorMap
+ -> LocatorParser (Maybe (Text, Text), [Inline])
+pLocatorWords locMap = do
+ optional $ pMatchChar "," (== ',')
+ optional pSpace
+ (la, lo) <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap
+ s <- getInput -- rest is suffix
+ -- need to trim, otherwise "p. 9" and "9" will have 'different' locators later on
+ -- i.e. the first one will be " 9"
+ return $
+ if T.null la && T.null lo
+ then (Nothing, s)
+ else (Just (la, T.strip lo), s)
+
+pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text)
+pLocatorDelimited locMap = try $ do
+ _ <- pMatchChar "{" (== '{')
+ skipMany pSpace -- gobble pre-spaces so label doesn't try to include them
+ (la, _) <- pLocatorLabelDelimited locMap
+ -- we only care about balancing {} and [] (because of the outer [] scope);
+ -- the rest can be anything
+ let inner = do { t <- anyToken; return (True, stringify t) }
+ gs <- many (pBalancedBraces [('{','}'), ('[',']')] inner)
+ _ <- pMatchChar "}" (== '}')
+ let lo = T.concat $ map snd gs
+ return (la, lo)
+
+pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Bool)
+pLocatorLabelDelimited locMap
+ = pLocatorLabel' locMap lim <|> return ("page", True)
+ where
+ lim = stringify <$> anyToken
+
+pLocatorIntegrated :: LocatorMap -> LocatorParser (Text, Text)
+pLocatorIntegrated locMap = try $ do
+ (la, wasImplicit) <- pLocatorLabelIntegrated locMap
+ -- if we got the label implicitly, we have presupposed the first one is
+ -- going to have a digit, so guarantee that. You _can_ have p. (a)
+ -- because you specified it.
+ let modifier = if wasImplicit
+ then requireDigits
+ else requireRomansOrDigits
+ g <- try $ pLocatorWordIntegrated (not wasImplicit) >>= modifier
+ gs <- many (try $ pLocatorWordIntegrated False >>= modifier)
+ let lo = T.concat (g:gs)
+ return (la, lo)
+
+pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Bool)
+pLocatorLabelIntegrated locMap
+ = pLocatorLabel' locMap lim <|> (lookAhead digital >> return ("page", True))
+ where
+ lim = try $ pLocatorWordIntegrated True >>= requireRomansOrDigits
+ digital = try $ pLocatorWordIntegrated True >>= requireDigits
+
+pLocatorLabel' :: LocatorMap -> LocatorParser Text
+ -> LocatorParser (Text, Bool)
+pLocatorLabel' locMap lim = go ""
+ where
+ -- grow the match string until we hit the end
+ -- trying to find the largest match for a label
+ go acc = try $ do
+ -- advance at least one token each time
+ -- the pathological case is "p.3"
+ t <- anyToken
+ ts <- manyTill anyToken (try $ lookAhead lim)
+ let s = acc <> stringify (t:ts)
+ case M.lookup (T.strip s) locMap of
+ -- try to find a longer one, or return this one
+ Just l -> go s <|> return (l, False)
+ Nothing -> go s
+
+-- hard requirement for a locator to have some real digits in it
+requireDigits :: (Bool, Text) -> LocatorParser Text
+requireDigits (_, s) = if not (T.any isDigit s)
+ then Prelude.fail "requireDigits"
+ else return s
+
+-- soft requirement for a sequence with some roman or arabic parts
+-- (a)(iv) -- because iv is roman
+-- 1(a) -- because 1 is an actual digit
+-- NOT: a, (a)-(b), hello, (some text in brackets)
+requireRomansOrDigits :: (Bool, Text) -> LocatorParser Text
+requireRomansOrDigits (d, s) = if not d
+ then Prelude.fail "requireRomansOrDigits"
+ else return s
+
+pLocatorWordIntegrated :: Bool -> LocatorParser (Bool, Text)
+pLocatorWordIntegrated isFirst = try $ do
+ punct <- if isFirst
+ then return ""
+ else (stringify <$> pLocatorSep) <|> return ""
+ sp <- option "" (pSpace >> return " ")
+ (dig, s) <- pBalancedBraces [('(',')'), ('[',']'), ('{','}')] pPageSeq
+ return (dig, punct <> sp <> s)
+
+-- we want to capture: 123, 123A, C22, XVII, 33-44, 22-33; 22-11
+-- 34(1), 34A(A), 34(1)(i)(i), (1)(a)
+-- [17], [17]-[18], '591 [84]'
+-- (because CSL cannot pull out individual pages/sections
+-- to wrap in braces on a per-style basis)
+pBalancedBraces :: [(Char, Char)]
+ -> LocatorParser (Bool, Text)
+ -> LocatorParser (Bool, Text)
+pBalancedBraces braces p = try $ do
+ ss <- many1 surround
+ return $ anyWereDigitLike ss
+ where
+ except = notFollowedBy pBraces >> p
+ -- outer and inner
+ surround = foldl (\a (open, close) -> sur open close except <|> a)
+ except
+ braces
+
+ isc c = stringify <$> pMatchChar [c] (== c)
+
+ sur c c' m = try $ do
+ (d, mid) <- between (isc c) (isc c') (option (False, "") m)
+ return (d, T.cons c . flip T.snoc c' $ mid)
+
+ flattened = concatMap (\(o, c) -> [o, c]) braces
+ pBraces = pMatchChar "braces" (`elem` flattened)
+
+
+-- YES 1, 1.2, 1.2.3
+-- NO 1., 1.2. a.6
+-- can't use sepBy because we want to leave trailing .s
+pPageSeq :: LocatorParser (Bool, Text)
+pPageSeq = oneDotTwo <|> withPeriod
+ where
+ oneDotTwo = do
+ u <- pPageUnit
+ us <- many withPeriod
+ return $ anyWereDigitLike (u:us)
+ withPeriod = try $ do
+ -- .2
+ p <- pMatchChar "." (== '.')
+ u <- try pPageUnit
+ return (fst u, stringify p <> snd u)
+
+anyWereDigitLike :: [(Bool, Text)] -> (Bool, Text)
+anyWereDigitLike as = (any fst as, T.concat $ map snd as)
+
+pPageUnit :: LocatorParser (Bool, Text)
+pPageUnit = roman <|> plainUnit
+ where
+ -- roman is a 'digit'
+ roman = (True,) <$> pRoman
+ plainUnit = do
+ ts <- many1 (notFollowedBy pSpace >>
+ notFollowedBy pLocatorPunct >>
+ anyToken)
+ let s = stringify ts
+ -- otherwise look for actual digits or -s
+ return (T.any isDigit s, s)
+
+pRoman :: LocatorParser Text
+pRoman = try $ do
+ tok <- anyToken
+ case tok of
+ Str t -> case parse (romanNumeral True *> eof)
+ "roman numeral" (T.toUpper t) of
+ Left _ -> mzero
+ Right () -> return t
+ _ -> mzero
+
+pLocatorPunct :: LocatorParser Inline
+pLocatorPunct = pMatchChar "punctuation" isLocatorPunct
+
+pLocatorSep :: LocatorParser Inline
+pLocatorSep = pMatchChar "locator separator" isLocatorSep
+
+pMatchChar :: String -> (Char -> Bool) -> LocatorParser Inline
+pMatchChar msg f = satisfyTok f' <?> msg
+ where
+ f' (Str (T.unpack -> [c])) = f c
+ f' _ = False
+
+pSpace :: LocatorParser Inline
+pSpace = satisfyTok (\t -> isSpacey t || t == Str "\160") <?> "space"
+
+satisfyTok :: (Inline -> Bool) -> LocatorParser Inline
+satisfyTok f = tokenPrim show (\sp _ _ -> sp) (\tok -> if f tok
+ then Just tok
+ else Nothing)
+
+isSpacey :: Inline -> Bool
+isSpacey Space = True
+isSpacey SoftBreak = True
+isSpacey _ = False
+
+isLocatorPunct :: Char -> Bool
+isLocatorPunct '-' = False -- page range
+isLocatorPunct '–' = False -- page range, en dash
+isLocatorPunct ':' = False -- vol:page-range hack
+isLocatorPunct c = isPunctuation c -- includes [{()}]
+
+isLocatorSep :: Char -> Bool
+isLocatorSep ',' = True
+isLocatorSep ';' = True
+isLocatorSep _ = False
+
+splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
+splitStrWhen _ [] = []
+splitStrWhen p (Str xs : ys) = go (T.unpack xs) ++ splitStrWhen p ys
+ where
+ go [] = []
+ go s = case break p s of
+ ([],[]) -> []
+ (zs,[]) -> [Str $ T.pack zs]
+ ([],w:ws) -> Str (T.singleton w) : go ws
+ (zs,w:ws) -> Str (T.pack zs) : Str (T.singleton w) : go ws
+splitStrWhen p (x : ys) = x : splitStrWhen p ys
+
+--
+-- Locator Map
+--
+
+type LocatorMap = M.Map Text Text
+
+toLocatorMap :: Locale -> LocatorMap
+toLocatorMap locale =
+ foldr go mempty locatorTerms
+ where
+ go tname locmap =
+ case M.lookup tname (localeTerms locale) of
+ Nothing -> locmap
+ Just ts -> foldr (\x -> M.insert (snd x) tname) locmap ts
+
+locatorTerms :: [Text]
+locatorTerms =
+ [ "book"
+ , "chapter"
+ , "column"
+ , "figure"
+ , "folio"
+ , "issue"
+ , "line"
+ , "note"
+ , "opus"
+ , "page"
+ , "number-of-pages"
+ , "paragraph"
+ , "part"
+ , "section"
+ , "sub verbo"
+ , "verse"
+ , "volume" ]
diff --git a/src/Text/Pandoc/Citeproc/MetaValue.hs b/src/Text/Pandoc/Citeproc/MetaValue.hs
new file mode 100644
index 000000000..53b14f904
--- /dev/null
+++ b/src/Text/Pandoc/Citeproc/MetaValue.hs
@@ -0,0 +1,252 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Citeproc.MetaValue
+ ( referenceToMetaValue
+ , metaValueToReference
+ , metaValueToText
+ , metaValueToPath
+ )
+where
+
+import Citeproc.Types
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder as B
+import Text.Pandoc.Walk (query)
+import Text.Pandoc.Shared (stringify)
+import Data.Maybe
+import Safe
+import qualified Data.Set as Set
+import qualified Data.Map as M
+import qualified Data.Text as T
+import Data.Text (Text)
+import Text.Printf (printf)
+import Control.Applicative ((<|>))
+
+metaValueToText :: MetaValue -> Maybe Text
+metaValueToText (MetaString t) = Just t
+metaValueToText (MetaInlines ils) = Just $ stringify ils
+metaValueToText (MetaBlocks bls) = Just $ stringify bls
+metaValueToText (MetaList xs) = T.unwords <$> mapM metaValueToText xs
+metaValueToText _ = Nothing
+
+metaValueToPath :: MetaValue -> Maybe FilePath
+metaValueToPath = fmap T.unpack . metaValueToText
+
+metaValueToBool :: MetaValue -> Maybe Bool
+metaValueToBool (MetaBool b) = Just b
+metaValueToBool (MetaString "true") = Just True
+metaValueToBool (MetaString "false") = Just False
+metaValueToBool (MetaInlines ils) =
+ metaValueToBool (MetaString (stringify ils))
+metaValueToBool _ = Nothing
+
+referenceToMetaValue :: Reference Inlines -> MetaValue
+referenceToMetaValue ref =
+ let ItemId id' = referenceId ref
+ type' = referenceType ref
+ in MetaMap $ M.insert "id" (MetaString id')
+ $ M.insert "type" (MetaString type')
+ $ M.map valToMetaValue
+ $ M.mapKeys fromVariable
+ $ referenceVariables ref
+
+
+valToMetaValue :: Val Inlines -> MetaValue
+valToMetaValue (TextVal t) = MetaString t
+valToMetaValue (FancyVal ils) = MetaInlines (B.toList ils)
+valToMetaValue (NumVal n) = MetaString (T.pack $ show n)
+valToMetaValue (NamesVal ns) = MetaList $ map nameToMetaValue ns
+valToMetaValue (DateVal d) = dateToMetaValue d
+
+nameToMetaValue :: Name -> MetaValue
+nameToMetaValue name =
+ MetaMap $
+ (maybe id (M.insert "family" . MetaString) (nameFamily name)) .
+ (maybe id (M.insert "given" . MetaString) (nameGiven name)) .
+ (maybe id (M.insert "dropping-particle" . MetaString)
+ (nameDroppingParticle name)) .
+ (maybe id (M.insert "non-dropping-particle" . MetaString)
+ (nameNonDroppingParticle name)) .
+ (maybe id (M.insert "suffix" . MetaString) (nameSuffix name)) .
+ (maybe id (M.insert "literal" . MetaString) (nameLiteral name)) .
+ (if nameCommaSuffix name
+ then M.insert "comma-suffix" (MetaBool True)
+ else id) .
+ (if nameStaticOrdering name
+ then M.insert "static-ordering" (MetaBool True)
+ else id)
+ $ mempty
+
+dateToMetaValue :: Date -> MetaValue
+dateToMetaValue date =
+ MetaString $
+ (case dateLiteral date of
+ Just l -> l
+ Nothing -> T.intercalate "/" $ map datePartsToEDTF $ dateParts date)
+ <> (if dateCirca date then "~" else "")
+ where
+ datePartsToEDTF (DateParts dps) =
+ T.pack $
+ (case dps of
+ (y:_) | y > 9999 || y < -10000 -> ('y':)
+ _ -> id) $
+ case dps of
+ (y:m:d:_)
+ | y < -1 -> printf "%05d-%02d-%02d" (y+1) m d
+ | otherwise -> printf "%04d-%02d-%02d" y m d
+ (y:m:[])
+ | y < -1 -> printf "%05d-%02d" (y+1) m
+ | otherwise -> printf "%04d-%02d" y m
+ (y:[])
+ | y < -1 -> printf "%05d" (y+1)
+ | otherwise -> printf "%04d" y
+ _ -> mempty
+
+metaValueToReference :: MetaValue -> Maybe (Reference Inlines)
+metaValueToReference (MetaMap m) = do
+ let m' = M.mapKeys normalizeKey m
+ id' <- M.lookup "id" m' >>= metaValueToText
+ type' <- (M.lookup "type" m' >>= metaValueToText) <|> pure ""
+ let m'' = M.delete "id" $ M.delete "type" m'
+ let vars = M.mapKeys toVariable $ M.mapWithKey metaValueToVal m''
+ return $ Reference { referenceId = ItemId id'
+ , referenceType = type'
+ , referenceDisambiguation = Nothing
+ , referenceVariables = vars }
+metaValueToReference _ = Nothing
+
+metaValueToVal :: Text -> MetaValue -> Val Inlines
+metaValueToVal k v
+ | k `Set.member` dateVariables
+ = DateVal $ metaValueToDate v
+ | k `Set.member` nameVariables
+ = NamesVal $ metaValueToNames v
+ | k == "other-ids"
+ = TextVal $ fromMaybe mempty $ metaValueToText v
+ -- will create space-separated list
+ | otherwise =
+ case v of
+ MetaString t -> TextVal t
+ MetaInlines ils -> FancyVal (B.fromList ils)
+ MetaBlocks bs -> FancyVal (B.fromList $ query id bs)
+ MetaBool b -> TextVal (if b then "true" else "false")
+ MetaList _ -> TextVal mempty
+ MetaMap _ -> TextVal mempty
+
+metaValueToDate :: MetaValue -> Date
+metaValueToDate (MetaMap m) =
+ Date
+ { dateParts = dateparts
+ , dateCirca = circa
+ , dateSeason = season
+ , dateLiteral = literal }
+ where
+ dateparts = case M.lookup "date-parts" m of
+ Just (MetaList xs) ->
+ mapMaybe metaValueToDateParts xs
+ Just _ -> []
+ Nothing ->
+ maybe [] (:[]) $ metaValueToDateParts (MetaMap m)
+ circa = fromMaybe False $
+ M.lookup "circa" m >>= metaValueToBool
+ season = M.lookup "season" m >>= metaValueToInt
+ literal = M.lookup "literal" m >>= metaValueToText
+metaValueToDate (MetaList xs) =
+ Date{ dateParts = mapMaybe metaValueToDateParts xs
+ , dateCirca = False
+ , dateSeason = Nothing
+ , dateLiteral = Nothing }
+metaValueToDate x =
+ fromMaybe emptyDate $ metaValueToText x >>= rawDateEDTF
+
+
+metaValueToInt :: MetaValue -> Maybe Int
+metaValueToInt x = metaValueToText x >>= readMay . T.unpack
+
+metaValueToDateParts :: MetaValue -> Maybe DateParts
+metaValueToDateParts (MetaList xs) =
+ Just $ DateParts $ map (fromMaybe 0 . metaValueToInt) xs
+metaValueToDateParts (MetaMap m) =
+ case (M.lookup "year" m >>= metaValueToInt,
+ ((M.lookup "month" m >>= metaValueToInt)
+ <|>
+ ((+ 20) <$> (M.lookup "season" m >>= metaValueToInt))),
+ M.lookup "day" m >>= metaValueToInt) of
+ (Just y, Just mo, Just d) -> Just $ DateParts [y, mo, d]
+ (Just y, Just mo, Nothing) -> Just $ DateParts [y, mo]
+ (Just y, Nothing, _) -> Just $ DateParts [y]
+ _ -> Nothing
+metaValueToDateParts _ = Nothing
+
+emptyDate :: Date
+emptyDate = Date { dateParts = []
+ , dateCirca = False
+ , dateSeason = Nothing
+ , dateLiteral = Nothing }
+
+metaValueToNames :: MetaValue -> [Name]
+metaValueToNames (MetaList xs) = mapMaybe metaValueToName xs
+metaValueToNames x = maybeToList $ metaValueToName x
+
+metaValueToName :: MetaValue -> Maybe Name
+metaValueToName (MetaMap m) = extractParticles <$>
+ Just Name
+ { nameFamily = family
+ , nameGiven = given
+ , nameDroppingParticle = dropping
+ , nameNonDroppingParticle = nondropping
+ , nameSuffix = suffix
+ , nameCommaSuffix = commasuffix
+ , nameStaticOrdering = staticordering
+ , nameLiteral = literal
+ }
+ where
+ family = M.lookup "family" m >>= metaValueToText
+ given = M.lookup "given" m >>= metaValueToText
+ dropping = M.lookup "dropping-particle" m
+ >>= metaValueToText
+ nondropping = M.lookup "non-dropping-particle" m
+ >>= metaValueToText
+ suffix = M.lookup "suffix" m >>= metaValueToText
+ commasuffix = fromMaybe False $
+ M.lookup "comma-suffix" m >>= metaValueToBool
+ staticordering = fromMaybe False $
+ M.lookup "static-ordering" m >>= metaValueToBool
+ literal = M.lookup "literal" m >>= metaValueToText
+metaValueToName x = extractParticles <$>
+ case metaValueToText x of
+ Nothing -> Nothing
+ Just lit -> Just Name
+ { nameFamily = Nothing
+ , nameGiven = Nothing
+ , nameDroppingParticle = Nothing
+ , nameNonDroppingParticle = Nothing
+ , nameSuffix = Nothing
+ , nameCommaSuffix = False
+ , nameStaticOrdering = False
+ , nameLiteral = Just lit }
+
+dateVariables :: Set.Set Text
+dateVariables = Set.fromList
+ [ "accessed", "container", "event-date", "issued",
+ "original-date", "submitted" ]
+
+nameVariables :: Set.Set Text
+nameVariables = Set.fromList
+ [ "author", "collection-editor", "composer",
+ "container-author", "director", "editor",
+ "editorial-director", "illustrator",
+ "interviewer", "original-author",
+ "recipient", "reviewed-author",
+ "translator" ]
+
+normalizeKey :: Text -> Text
+normalizeKey k =
+ case T.toLower k of
+ "doi" -> "DOI"
+ "isbn" -> "ISBN"
+ "issn" -> "ISSN"
+ "pmcid" -> "PMCID"
+ "pmid" -> "PMID"
+ "url" -> "URL"
+ x -> x
+
diff --git a/src/Text/Pandoc/Citeproc/Util.hs b/src/Text/Pandoc/Citeproc/Util.hs
new file mode 100644
index 000000000..6d8e01bc9
--- /dev/null
+++ b/src/Text/Pandoc/Citeproc/Util.hs
@@ -0,0 +1,70 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Citeproc.Util
+ ( toIETF )
+where
+import Data.Text (Text)
+
+toIETF :: Text -> Text
+toIETF "english" = "en-US" -- "en-EN" unavailable in CSL
+toIETF "usenglish" = "en-US"
+toIETF "american" = "en-US"
+toIETF "british" = "en-GB"
+toIETF "ukenglish" = "en-GB"
+toIETF "canadian" = "en-US" -- "en-CA" unavailable in CSL
+toIETF "australian" = "en-GB" -- "en-AU" unavailable in CSL
+toIETF "newzealand" = "en-GB" -- "en-NZ" unavailable in CSL
+toIETF "afrikaans" = "af-ZA"
+toIETF "arabic" = "ar"
+toIETF "basque" = "eu"
+toIETF "bulgarian" = "bg-BG"
+toIETF "catalan" = "ca-AD"
+toIETF "croatian" = "hr-HR"
+toIETF "czech" = "cs-CZ"
+toIETF "danish" = "da-DK"
+toIETF "dutch" = "nl-NL"
+toIETF "estonian" = "et-EE"
+toIETF "finnish" = "fi-FI"
+toIETF "canadien" = "fr-CA"
+toIETF "acadian" = "fr-CA"
+toIETF "french" = "fr-FR"
+toIETF "francais" = "fr-FR"
+toIETF "austrian" = "de-AT"
+toIETF "naustrian" = "de-AT"
+toIETF "german" = "de-DE"
+toIETF "germanb" = "de-DE"
+toIETF "ngerman" = "de-DE"
+toIETF "greek" = "el-GR"
+toIETF "polutonikogreek" = "el-GR"
+toIETF "hebrew" = "he-IL"
+toIETF "hungarian" = "hu-HU"
+toIETF "icelandic" = "is-IS"
+toIETF "italian" = "it-IT"
+toIETF "japanese" = "ja-JP"
+toIETF "latvian" = "lv-LV"
+toIETF "lithuanian" = "lt-LT"
+toIETF "magyar" = "hu-HU"
+toIETF "mongolian" = "mn-MN"
+toIETF "norsk" = "nb-NO"
+toIETF "nynorsk" = "nn-NO"
+toIETF "farsi" = "fa-IR"
+toIETF "polish" = "pl-PL"
+toIETF "brazil" = "pt-BR"
+toIETF "brazilian" = "pt-BR"
+toIETF "portugues" = "pt-PT"
+toIETF "portuguese" = "pt-PT"
+toIETF "romanian" = "ro-RO"
+toIETF "russian" = "ru-RU"
+toIETF "serbian" = "sr-RS"
+toIETF "serbianc" = "sr-RS"
+toIETF "slovak" = "sk-SK"
+toIETF "slovene" = "sl-SL"
+toIETF "spanish" = "es-ES"
+toIETF "swedish" = "sv-SE"
+toIETF "thai" = "th-TH"
+toIETF "turkish" = "tr-TR"
+toIETF "ukrainian" = "uk-UA"
+toIETF "vietnamese" = "vi-VN"
+toIETF "latin" = "la"
+toIETF x = x
+
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index a454de1d0..6042973ab 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -553,7 +553,7 @@ getDefaultReferencePptx = do
mapM pathToEntry paths
-- | Read file from user data directory or,
--- if not found there, from Cabal data directory.
+-- if not found there, from the default data files.
readDataFile :: PandocMonad m => FilePath -> m B.ByteString
readDataFile fname = do
datadir <- getUserDataDir
@@ -565,7 +565,7 @@ readDataFile fname = do
then readFileStrict (userDir </> fname)
else readDefaultDataFile fname
--- | Read file from from Cabal data directory.
+-- | Read file from from the default data files.
readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
readDefaultDataFile "reference.docx" =
B.concat . BL.toChunks . fromArchive <$> getDefaultReferenceDocx
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 2e67e5bc1..63973bd05 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -32,6 +32,7 @@ import Text.Printf (printf)
import Text.Parsec.Error
import Text.Parsec.Pos hiding (Line)
import Text.Pandoc.Shared (tshow)
+import Citeproc (CiteprocError, prettyCiteprocError)
type Input = Text
@@ -60,6 +61,7 @@ data PandocError = PandocIOError Text IOError
| PandocUnknownReaderError Text
| PandocUnknownWriterError Text
| PandocUnsupportedExtensionError Text Text
+ | PandocCiteprocError CiteprocError
deriving (Show, Typeable, Generic)
instance Exception PandocError
@@ -139,6 +141,8 @@ handleError (Left e) =
PandocUnsupportedExtensionError ext f -> err 23 $
"The extension " <> ext <> " is not supported " <>
"for " <> f
+ PandocCiteprocError e' -> err 24 $
+ prettyCiteprocError e'
err :: Int -> Text -> IO a
err exitCode msg = do
diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs
index 502aaefae..f5c1a4f76 100644
--- a/src/Text/Pandoc/Filter.hs
+++ b/src/Text/Pandoc/Filter.hs
@@ -26,6 +26,7 @@ import Text.Pandoc.Class.PandocMonad (report, getVerbosity)
import Text.Pandoc.Definition (Pandoc)
import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Logging
+import Text.Pandoc.Citeproc (processCitations)
import qualified Text.Pandoc.Filter.JSON as JSONFilter
import qualified Text.Pandoc.Filter.Lua as LuaFilter
import qualified Text.Pandoc.Filter.Path as Path
@@ -39,6 +40,7 @@ import Control.Monad (foldM, when)
-- | Type of filter and path to filter file.
data Filter = LuaFilter FilePath
| JSONFilter FilePath
+ | CiteprocFilter -- built-in citeproc
deriving (Show, Generic)
instance FromYAML Filter where
@@ -47,15 +49,19 @@ instance FromYAML Filter where
ty <- m .: "type"
fp <- m .: "path"
case ty of
+ "citeproc" -> return CiteprocFilter
"lua" -> return $ LuaFilter $ T.unpack fp
"json" -> return $ JSONFilter $ T.unpack fp
_ -> fail $ "Unknown filter type " ++ show (ty :: T.Text)) node
<|>
(withStr "Filter" $ \t -> do
let fp = T.unpack t
- case takeExtension fp of
- ".lua" -> return $ LuaFilter fp
- _ -> return $ JSONFilter fp) node
+ if fp == "citeproc"
+ then return CiteprocFilter
+ else return $
+ case takeExtension fp of
+ ".lua" -> LuaFilter fp
+ _ -> JSONFilter fp) node
-- | Modify the given document using a filter.
applyFilters :: ReaderOptions
@@ -71,6 +77,8 @@ applyFilters ropts filters args d = do
withMessages f $ JSONFilter.apply ropts args f doc
applyFilter doc (LuaFilter f) =
withMessages f $ LuaFilter.apply ropts args f doc
+ applyFilter doc CiteprocFilter =
+ processCitations doc
withMessages f action = do
verbosity <- getVerbosity
when (verbosity == INFO) $ report $ RunningFilter f
@@ -85,5 +93,6 @@ applyFilters ropts filters args d = do
expandFilterPath :: Filter -> PandocIO Filter
expandFilterPath (LuaFilter fp) = LuaFilter <$> Path.expandFilterPath fp
expandFilterPath (JSONFilter fp) = JSONFilter <$> Path.expandFilterPath fp
+expandFilterPath CiteprocFilter = return CiteprocFilter
$(deriveJSON defaultOptions ''Filter)
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index af59316b5..f6a2a6e1a 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -98,6 +98,7 @@ data LogMessage =
| CouldNotDeduceFormat [Text.Text] Text.Text
| RunningFilter FilePath
| FilterCompleted FilePath Integer
+ | CiteprocWarning Text.Text
deriving (Show, Eq, Data, Ord, Typeable, Generic)
instance ToJSON LogMessage where
@@ -227,6 +228,8 @@ instance ToJSON LogMessage where
FilterCompleted fp ms ->
["path" .= Text.pack fp
,"milliseconds" .= Text.pack (show ms) ]
+ CiteprocWarning msg ->
+ ["message" .= msg]
showPos :: SourcePos -> Text.Text
showPos pos = Text.pack $ sn ++ "line " ++
@@ -338,6 +341,7 @@ showLogMessage msg =
RunningFilter fp -> "Running filter " <> Text.pack fp
FilterCompleted fp ms -> "Completed filter " <> Text.pack fp <>
" in " <> Text.pack (show ms) <> " ms"
+ CiteprocWarning ms -> "Citeproc: " <> ms
messageVerbosity :: LogMessage -> Verbosity
messageVerbosity msg =
@@ -383,3 +387,4 @@ messageVerbosity msg =
CouldNotDeduceFormat{} -> WARNING
RunningFilter{} -> INFO
FilterCompleted{} -> INFO
+ CiteprocWarning{} -> WARNING
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 1337c742c..9a069f7d0 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -51,6 +51,9 @@ module Text.Pandoc.Readers
, readFB2
, readIpynb
, readCSV
+ , readCslJson
+ , readBibTeX
+ , readBibLaTeX
-- * Miscellaneous
, getReader
, getDefaultExtensions
@@ -95,6 +98,8 @@ import Text.Pandoc.Readers.Txt2Tags
import Text.Pandoc.Readers.Vimwiki
import Text.Pandoc.Readers.Man
import Text.Pandoc.Readers.CSV
+import Text.Pandoc.Readers.CslJson
+import Text.Pandoc.Readers.BibTeX
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Parsec.Error
@@ -138,6 +143,9 @@ readers = [ ("native" , TextReader readNative)
,("fb2" , TextReader readFB2)
,("ipynb" , TextReader readIpynb)
,("csv" , TextReader readCSV)
+ ,("csljson" , TextReader readCslJson)
+ ,("bibtex" , TextReader readBibTeX)
+ ,("biblatex" , TextReader readBibLaTeX)
]
-- | Retrieve reader, extensions based on formatSpec (format+extensions).
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
new file mode 100644
index 000000000..c367e75a1
--- /dev/null
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -0,0 +1,70 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.Readers.BibTeX
+ Copyright : Copyright (C) 2020 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Parses BibTeX or BibLaTeX bibliographies into a Pandoc document
+with empty body and `references` and `nocite` fields
+in the metadata. A wildcard `nocite` is used so that
+if the document is rendered in another format, the
+entire bibliography will be printed.
+-}
+module Text.Pandoc.Readers.BibTeX
+ ( readBibTeX
+ , readBibLaTeX
+ )
+where
+
+import Text.Pandoc.Options
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder (setMeta, cite, str)
+import Data.Text (Text)
+import Citeproc (Lang(..), parseLang)
+import Citeproc.Locale (getLocale)
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Error (PandocError(..))
+import Text.Pandoc.Class (PandocMonad, lookupEnv)
+import Text.Pandoc.Citeproc.BibTeX as BibTeX
+import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Control.Monad.Except (throwError)
+
+-- | Read BibTeX from an input string and return a Pandoc document.
+-- The document will have only metadata, with an empty body.
+-- The metadata will contain a `references` field with the
+-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
+readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibTeX = readBibTeX' BibTeX.Bibtex
+
+-- | Read BibLaTeX from an input string and return a Pandoc document.
+-- The document will have only metadata, with an empty body.
+-- The metadata will contain a `references` field with the
+-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
+readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibLaTeX = readBibTeX' BibTeX.Biblatex
+
+readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
+readBibTeX' variant _opts t = do
+ lang <- fromMaybe (Lang "en" (Just "US")) . fmap parseLang
+ <$> lookupEnv "LANG"
+ locale <- case getLocale lang of
+ Left e -> throwError $ PandocCiteprocError e
+ Right l -> return l
+ case BibTeX.readBibtexString variant locale (const True) t of
+ Left e -> throwError $ PandocParsecError t e
+ Right refs -> return $ setMeta "references"
+ (map referenceToMetaValue refs)
+ . setMeta "nocite"
+ (cite [Citation {citationId = "*"
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0}]
+ (str "[@*]"))
+ $ Pandoc nullMeta []
+
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
new file mode 100644
index 000000000..377186b1e
--- /dev/null
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -0,0 +1,53 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.Readers.CslJson
+ Copyright : Copyright (C) 2020 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Parses CSL JSON bibliographies into a Pandoc document
+with empty body and `references` and `nocite` fields
+in the metadata. A wildcard `nocite` is used so that
+if the document is rendered in another format, the
+entire bibliography will be printed.
+
+<https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html>.
+-}
+module Text.Pandoc.Readers.CslJson
+ ( readCslJson )
+where
+
+import Text.Pandoc.Options
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder (setMeta, cite, str)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Error (PandocError(..))
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
+import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Control.Monad.Except (throwError)
+
+-- | Read CSL JSON from an input string and return a Pandoc document.
+-- The document will have only metadata, with an empty body.
+-- The metadata will contain a `references` field with the
+-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
+readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCslJson _opts t =
+ case cslJsonToReferences (UTF8.fromText t) of
+ Left e -> throwError $ PandocParseError $ T.pack e
+ Right refs -> return $ setMeta "references"
+ (map referenceToMetaValue refs)
+ . setMeta "nocite"
+ (cite [Citation {citationId = "*"
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0}]
+ (str "[@*]"))
+ $ Pandoc nullMeta []
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 77f28b21b..257788081 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1665,6 +1665,7 @@ str = do
abbrevs <- getOption readerAbbreviations
if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
then try (do ils <- whitespace
+ notFollowedBy (() <$ cite <|> () <$ note)
-- ?? lookAhead alphaNum
-- replace space after with nonbreaking space
-- if softbreak, move before abbrev if possible (#4635)
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 1f253b465..0c10b258d 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -80,6 +80,7 @@ getDefaultTemplate writer = do
let format = T.takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions
case format of
"native" -> return ""
+ "csljson" -> return ""
"json" -> return ""
"docx" -> return ""
"fb2" -> return ""
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index df8355c32..0654c2d85 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -24,6 +24,7 @@ module Text.Pandoc.Writers
, writeCommonMark
, writeConTeXt
, writeCustom
+ , writeCslJson
, writeDZSlides
, writeDocbook4
, writeDocbook5
@@ -86,6 +87,7 @@ import Text.Pandoc.Error
import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Writers.CommonMark
import Text.Pandoc.Writers.ConTeXt
+import Text.Pandoc.Writers.CslJson
import Text.Pandoc.Writers.Custom
import Text.Pandoc.Writers.Docbook
import Text.Pandoc.Writers.Docx
@@ -182,6 +184,7 @@ writers = [
,("gfm" , TextWriter writeCommonMark)
,("tei" , TextWriter writeTEI)
,("muse" , TextWriter writeMuse)
+ ,("csljson" , TextWriter writeCslJson)
]
-- | Retrieve writer, extensions based on formatSpec (format+extensions).
diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
new file mode 100644
index 000000000..9f6f2f8ea
--- /dev/null
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -0,0 +1,87 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.Writers.CslJson
+ Copyright : Copyright (C) 2020 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of references from 'Pandoc' metadata to CSL JSON:
+<https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html>.
+
+Note that this writer ignores everything in the body of the
+document and everything in the metadata except `references`.
+It assumes that the `references` field is a list with the structure
+of a CSL JSON bibliography.
+-}
+module Text.Pandoc.Writers.CslJson ( writeCslJson )
+where
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Error
+import Text.Pandoc.Class
+import Control.Monad.Except (throwError)
+import Data.ByteString.Lazy (toStrict)
+import Data.ByteString (ByteString)
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder as B
+import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
+import Citeproc (parseLang, Locale, Reference(..), Lang(..))
+import Control.Monad.Identity
+import Citeproc.Locale (getLocale)
+import Citeproc.CslJson
+import Text.Pandoc.Options (WriterOptions)
+import Data.Maybe (fromMaybe, mapMaybe)
+import Data.Aeson.Encode.Pretty (Config (..), Indent (Spaces),
+ NumberFormat (Generic),
+ defConfig, encodePretty')
+
+writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeCslJson _opts (Pandoc meta _) = do
+ let lang = fromMaybe (Lang "en" (Just "US")) $
+ parseLang <$> (lookupMeta "lang" meta >>= metaValueToText)
+ locale <- case getLocale lang of
+ Left e -> throwError $ PandocCiteprocError e
+ Right l -> return l
+ case lookupMeta "references" meta of
+ Just (MetaList rs) -> return $ (UTF8.toText $
+ toCslJson locale (mapMaybe metaValueToReference rs)) <> "\n"
+ _ -> throwError $ PandocAppError "No references field"
+
+fromInlines :: [Inline] -> CslJson Text
+fromInlines = foldMap fromInline . B.fromList
+
+fromInline :: Inline -> CslJson Text
+fromInline (Str t) = CslText t
+fromInline (Emph ils) = CslItalic (fromInlines ils)
+fromInline (Strong ils) = CslBold (fromInlines ils)
+fromInline (Underline ils) = CslUnderline (fromInlines ils)
+fromInline (Strikeout ils) = fromInlines ils
+fromInline (Superscript ils) = CslSup (fromInlines ils)
+fromInline (Subscript ils) = CslSub (fromInlines ils)
+fromInline (SmallCaps ils) = CslSmallCaps (fromInlines ils)
+fromInline (Quoted _ ils) = CslQuoted (fromInlines ils)
+fromInline (Cite _ ils) = fromInlines ils
+fromInline (Code _ t) = CslText t
+fromInline Space = CslText " "
+fromInline SoftBreak = CslText " "
+fromInline LineBreak = CslText "\n"
+fromInline (Math _ t) = CslText t
+fromInline (RawInline _ _) = CslEmpty
+fromInline (Link _ ils _) = fromInlines ils
+fromInline (Image _ ils _) = fromInlines ils
+fromInline (Note _) = CslEmpty
+fromInline (Span (_,[cl],_) ils)
+ | "csl-" `T.isPrefixOf` cl = CslDiv cl (fromInlines ils)
+fromInline (Span _ ils) = fromInlines ils
+
+toCslJson :: Locale -> [Reference Inlines] -> ByteString
+toCslJson locale = toStrict .
+ encodePretty' defConfig{ confIndent = Spaces 2
+ , confCompare = compare
+ , confNumFormat = Generic }
+ . map (runIdentity . traverse (return . renderCslJson locale . foldMap fromInline))
+
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index fa7e2ceea..89a50125b 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1184,6 +1184,18 @@ inlineToOpenXML' _ (Str str) =
formattedString str
inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ")
inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ")
+inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) =
+ inlinesToOpenXML opts ils
+inlineToOpenXML' opts (Span ("",["csl-left-margin"],[]) ils) =
+ inlinesToOpenXML opts ils
+inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) =
+ ([mknode "w:r" []
+ (mknode "w:t"
+ [("xml:space","preserve")]
+ ("\t" :: String))] ++)
+ <$> inlinesToOpenXML opts ils
+inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) =
+ inlinesToOpenXML opts ils
inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do
-- prefer the "id" in kvs, since that is the one produced by the docx
-- reader.
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index b40765145..b6bde7f8f 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -86,6 +86,8 @@ data WriterState = WriterState
, stSlideLevel :: Int -- ^ Slide level
, stInSection :: Bool -- ^ Content is in a section (revealjs)
, stCodeBlockNum :: Int -- ^ Number of code block
+ , stCsl :: Bool -- ^ Has CSL references
+ , stCslEntrySpacing :: Maybe Int -- ^ CSL entry spacing
}
defaultWriterState :: WriterState
@@ -96,7 +98,9 @@ defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False,
stSlideVariant = NoSlides,
stSlideLevel = 1,
stInSection = False,
- stCodeBlockNum = 0}
+ stCodeBlockNum = 0,
+ stCsl = False,
+ stCslEntrySpacing = Nothing}
-- Helpers to render HTML with the appropriate function.
@@ -316,39 +320,48 @@ pandocToHtml opts (Pandoc meta blocks) = do
Just sty -> defField "highlighting-css"
(T.pack $ styleToCss sty)
Nothing -> id
- else id) $
+ else id) .
+ (if stCsl st
+ then defField "csl-css" True .
+ (case stCslEntrySpacing st of
+ Nothing -> id
+ Just 0 -> id
+ Just n ->
+ defField "csl-entry-spacing"
+ (tshow n <> "em"))
+ else id) .
(if stMath st
then defField "math" (renderHtml' math)
- else id) $
+ else id) .
(case writerHTMLMathMethod opts of
MathJax u -> defField "mathjax" True .
defField "mathjaxurl"
(T.takeWhile (/='?') u)
- _ -> defField "mathjax" False) $
+ _ -> defField "mathjax" False) .
(case writerHTMLMathMethod opts of
PlainMath -> defField "displaymath-css" True
WebTeX _ -> defField "displaymath-css" True
- _ -> id) $
- defField "document-css" (isNothing mCss && slideVariant == NoSlides) $
- defField "quotes" (stQuotes st) $
+ _ -> id) .
+ defField "document-css" (isNothing mCss && slideVariant == NoSlides) .
+ defField "quotes" (stQuotes st) .
-- for backwards compatibility we populate toc
-- with the contents of the toc, rather than a
-- boolean:
- maybe id (defField "toc") toc $
- maybe id (defField "table-of-contents") toc $
- defField "author-meta" authsMeta $
+ maybe id (defField "toc") toc .
+ maybe id (defField "table-of-contents") toc .
+ defField "author-meta" authsMeta .
maybe id (defField "date-meta")
- (normalizeDate dateMeta) $
+ (normalizeDate dateMeta) .
defField "pagetitle"
- (stringifyHTML . docTitle $ meta) $
- defField "idprefix" (writerIdentifierPrefix opts) $
+ (stringifyHTML . docTitle $ meta) .
+ defField "idprefix" (writerIdentifierPrefix opts) .
-- these should maybe be set in pandoc.hs
defField "slidy-url"
- ("https://www.w3.org/Talks/Tools/Slidy2" :: Text) $
- defField "slideous-url" ("slideous" :: Text) $
+ ("https://www.w3.org/Talks/Tools/Slidy2" :: Text) .
+ defField "slideous-url" ("slideous" :: Text) .
defField "revealjs-url" ("https://unpkg.com/reveal.js@^4/" :: Text) $
- defField "s5-url" ("s5/default" :: Text) $
- defField "html5" (stHtml5 st)
+ defField "s5-url" ("s5/default" :: Text) .
+ defField "html5" (stHtml5 st) $
metadata
return (thebody, context)
@@ -743,12 +756,17 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs)
blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
html5 <- gets stHtml5
slideVariant <- gets stSlideVariant
+ let isCslBibBody = ident == "refs" || "csl-bib-body" `elem` classes
+ when isCslBibBody $ modify $ \st -> st{ stCsl = True
+ , stCslEntrySpacing =
+ lookup "entry-spacing" kvs' >>=
+ safeRead }
+ let isCslBibEntry = "csl-entry" `elem` classes
let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++
[("style", "width:" <> w <> ";") | "column" `elem` classes,
("width", w) <- kvs'] ++
- [("role", "doc-bibliography") | ident == "refs" && html5] ++
- [("role", "doc-biblioentry")
- | "ref-" `T.isPrefixOf` ident && html5]
+ [("role", "doc-bibliography") | isCslBibBody && html5] ++
+ [("role", "doc-biblioentry") | isCslBibEntry && html5]
let speakerNotes = "notes" `elem` classes
-- we don't want incremental output inside speaker notes, see #1394
let opts' = if | speakerNotes -> opts{ writerIncremental = False }
@@ -765,7 +783,9 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
-- a newline between the column divs, which throws
-- off widths! see #4028
mconcat <$> mapM (blockToHtml opts) bs
- else blockListToHtml opts' bs
+ else if isCslBibEntry
+ then mconcat <$> mapM (cslEntryToHtml opts') bs
+ else blockListToHtml opts' bs
let contents' = nl opts >> contents >> nl opts
let (divtag, classes'') = if html5 && "section" `elem` classes'
then (H5.section, filter (/= "section") classes')
@@ -1439,6 +1459,23 @@ blockListToNote opts ref blocks = do
_ -> noteItem
return $ nl opts >> noteItem'
+cslEntryToHtml :: PandocMonad m
+ => WriterOptions
+ -> Block
+ -> StateT WriterState m Html
+cslEntryToHtml opts (Para xs) = do
+ html5 <- gets stHtml5
+ let inDiv :: Text -> Html -> Html
+ inDiv cls x = ((if html5 then H5.div else H.div)
+ x ! A.class_ (toValue cls))
+ let go (Span ("",[cls],[]) ils)
+ | cls == "csl-block" || cls == "csl-left-margin" ||
+ cls == "csl-right-inline" || cls == "csl-indent"
+ = inDiv cls <$> inlineListToHtml opts ils
+ go il = inlineToHtml opts il
+ mconcat <$> mapM go xs
+cslEntryToHtml opts x = blockToHtml opts x
+
isMathEnvironment :: Text -> Bool
isMathEnvironment s = "\\begin{" `T.isPrefixOf` s &&
envName `elem` mathmlenvs
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 228b34d09..a4003b672 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -71,7 +71,6 @@ data WriterState =
, stBeamer :: Bool -- produce beamer
, stEmptyLine :: Bool -- true if no content on line
, stHasCslRefs :: Bool -- has a Div with class refs
- , stCslHangingIndent :: Bool -- use hanging indent for bib
, stIsFirstInDefinition :: Bool -- first block in a defn list
}
@@ -103,7 +102,6 @@ startingState options = WriterState {
, stBeamer = False
, stEmptyLine = True
, stHasCslRefs = False
- , stCslHangingIndent = False
, stIsFirstInDefinition = False }
-- | Convert Pandoc to LaTeX.
@@ -243,7 +241,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
else defField "dir" ("ltr" :: Text)) $
defField "section-titles" True $
defField "csl-refs" (stHasCslRefs st) $
- defField "csl-hanging-indent" (stCslHangingIndent st) $
defField "geometry" geometryFromMargins $
(case T.uncons . render Nothing <$>
getField "papersize" metadata of
@@ -541,16 +538,23 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
then modify $ \st -> st{ stIncremental = True }
else when (beamer && "nonincremental" `elem` classes) $
modify $ \st -> st { stIncremental = False }
- result <- if identifier == "refs"
+ result <- if identifier == "refs" || -- <- for backwards compatibility
+ "csl-bib-body" `elem` classes
then do
+ modify $ \st -> st{ stHasCslRefs = True }
inner <- blockListToLaTeX bs
- modify $ \st -> st{ stHasCslRefs = True
- , stCslHangingIndent =
- "hanging-indent" `elem` classes }
- return $ "\\begin{cslreferences}" $$
- inner $$
- "\\end{cslreferences}"
- else blockListToLaTeX bs
+ return $ "\\begin{CSLReferences}" <>
+ (if "hanging-indent" `elem` classes
+ then braces "1"
+ else braces "0") <>
+ (case lookup "entry-spacing" kvs of
+ Nothing -> braces "0"
+ Just s -> braces (literal s))
+ $$ inner
+ $+$ "\\end{CSLReferences}"
+ else if "csl-entry" `elem` classes
+ then vcat <$> mapM cslEntryToLaTeX bs
+ else blockListToLaTeX bs
modify $ \st -> st{ stIncremental = oldIncremental }
linkAnchor' <- hypertarget True identifier empty
-- see #2704 for the motivation for adding \leavevmode:
@@ -1151,6 +1155,23 @@ isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True
isQuoted _ = False
+cslEntryToLaTeX :: PandocMonad m
+ => Block
+ -> LW m (Doc Text)
+cslEntryToLaTeX (Para xs) =
+ mconcat <$> mapM go xs
+ where
+ go (Span ("",["csl-block"],[]) ils) =
+ (cr <>) . inCmd "CSLBlock" <$> inlineListToLaTeX ils
+ go (Span ("",["csl-left-margin"],[]) ils) =
+ inCmd "CSLLeftMargin" <$> inlineListToLaTeX ils
+ go (Span ("",["csl-right-inline"],[]) ils) =
+ (cr <>) . inCmd "CSLRightInline" <$> inlineListToLaTeX ils
+ go (Span ("",["csl-indent"],[]) ils) =
+ (cr <>) . inCmd "CSLIndent" <$> inlineListToLaTeX ils
+ go il = inlineToLaTeX il
+cslEntryToLaTeX x = blockToLaTeX x
+
-- | Convert inline element to LaTeX
inlineToLaTeX :: PandocMonad m
=> Inline -- ^ Inline to convert
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index f3aadde59..dbf7a3d79 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -110,16 +110,37 @@ blockToMs :: PandocMonad m
-> Block -- ^ Block element
-> MS m (Doc Text)
blockToMs _ Null = return empty
-blockToMs opts (Div (ident,_,_) bs) = do
+blockToMs opts (Div (ident,cls,kvs) bs) = do
let anchor = if T.null ident
then empty
else nowrap $
literal ".pdfhref M "
<> doubleQuotes (literal (toAscii ident))
- setFirstPara
- res <- blockListToMs opts bs
- setFirstPara
- return $ anchor $$ res
+ case cls of
+ _ | "csl-entry" `elem` cls ->
+ (".CSLENTRY" $$) . vcat <$> mapM (cslEntryToMs True opts) bs
+ | "csl-bib-body" `elem` cls -> do
+ res <- blockListToMs opts bs
+ return $ anchor $$
+ -- so that XP paragraphs are indented:
+ ".nr PI 3n" $$
+ -- space between entries
+ ".de CSLENTRY" $$
+ (case lookup "entry-spacing" kvs >>= safeRead of
+ Just n | n > (0 :: Int) -> ".sp"
+ _ -> mempty) $$
+ ".." $$
+ ".de CSLP" $$
+ (if "hanging-indent" `elem` cls
+ then ".XP"
+ else ".LP") $$
+ ".." $$
+ res
+ _ -> do
+ setFirstPara
+ res <- blockListToMs opts bs
+ setFirstPara
+ return $ anchor $$ res
blockToMs opts (Plain inlines) =
liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines
blockToMs opts (Para [Image attr alt (src,_tit)])
@@ -440,6 +461,39 @@ inlineToMs _ (Note contents) = do
modify $ \st -> st{ stNotes = contents : stNotes st }
return $ literal "\\**"
+cslEntryToMs :: PandocMonad m
+ => Bool
+ -> WriterOptions
+ -> Block
+ -> MS m (Doc Text)
+cslEntryToMs atStart opts (Para xs) =
+ case xs of
+ (Span ("",["csl-left-margin"],[]) lils :
+ rest@(Span ("",["csl-right-inline"],[]) _ : _))
+ -> do lils' <- inlineListToMs' opts lils
+ ((cr <> literal ".IP " <>
+ doubleQuotes (nowrap lils') <>
+ literal " 5") $$)
+ <$> cslEntryToMs False opts (Para rest)
+ (Span ("",["csl-block"],[]) ils : rest)
+ -> ((cr <> literal ".LP") $$)
+ <$> cslEntryToMs False opts (Para (ils ++ rest))
+ (Span ("",["csl-left-margin"],[]) ils : rest)
+ -> ((cr <> literal ".LP") $$)
+ <$> cslEntryToMs False opts (Para (ils ++ rest))
+ (Span ("",["csl-indented"],[]) ils : rest)
+ -> ((cr <> literal ".LP") $$)
+ <$> cslEntryToMs False opts (Para (ils ++ rest))
+ _ | atStart
+ -> (".CSLP" $$) <$> cslEntryToMs False opts (Para xs)
+ | otherwise
+ -> case xs of
+ [] -> return mempty
+ (x:rest) -> (<>) <$> (inlineToMs opts x)
+ <*> (cslEntryToMs False opts (Para rest))
+cslEntryToMs _ opts x = blockToMs opts x
+
+
handleNotes :: PandocMonad m => WriterOptions -> Doc Text -> MS m (Doc Text)
handleNotes opts fallback = do
notes <- gets stNotes