aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Citeproc/BibTeX.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Citeproc/BibTeX.hs')
-rw-r--r--src/Text/Pandoc/Citeproc/BibTeX.hs1237
1 files changed, 1237 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
new file mode 100644
index 000000000..5919fee77
--- /dev/null
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -0,0 +1,1237 @@
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module : Text.CSL.Input.Bibtex
+-- Copyright : (c) John MacFarlane
+-- License : BSD-style (see LICENSE)
+--
+-- Maintainer : John MacFarlane <fiddlosopher@gmail.com>
+-- Stability : unstable-- Portability : unportable
+--
+-----------------------------------------------------------------------------
+
+module Text.Pandoc.Citeproc.BibTeX
+ ( Variant(..)
+ , readBibtexString
+ )
+ where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder as B
+import Text.Pandoc.Readers.LaTeX (readLaTeX)
+import Text.Pandoc.Extensions (Extension(..), extensionsFromList)
+import Text.Pandoc.Options (ReaderOptions(..))
+import Text.Pandoc.Class (runPure)
+import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.Shared (stringify)
+import qualified Text.Pandoc.Walk as Walk
+import Citeproc.Types
+import Citeproc.CaseTransform (withSentenceCase)
+import Citeproc.Pandoc (caseTransform)
+import Text.Pandoc.Citeproc.Util (toIETF)
+import Text.Pandoc.Citeproc.Data (biblatexStringMap)
+import Data.Default
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Map as Map
+import Data.Maybe
+import Text.Parsec hiding (State, many, (<|>))
+import Control.Applicative
+import Data.List.Split (splitOn, splitWhen, wordsBy)
+import Control.Monad.RWS hiding ((<>))
+import qualified Data.Sequence as Seq
+import Data.Char (isAlphaNum, isDigit, isLetter,
+ isUpper, toLower, toUpper,
+ isLower, isPunctuation)
+import Data.List (foldl', intercalate)
+import Safe (readMay)
+
+data Variant = Bibtex | Biblatex
+ deriving (Show, Eq, Ord)
+
+-- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
+readBibtexString :: Variant -- ^ bibtex or biblatex
+ -> Locale -- ^ Locale
+ -> (Text -> Bool) -- ^ Filter on citation ids
+ -> Text -- ^ bibtex/biblatex text
+ -> Either ParseError [Reference Inlines]
+readBibtexString variant locale idpred contents = do
+ case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
+ mapM (itemToReference locale variant) .
+ filter (idpred . identifier))
+ (fromMaybe defaultLang $ localeLanguage locale, Map.empty)
+ "" contents of
+ Left err -> Left err
+ Right xs -> return xs
+
+defaultLang :: Lang
+defaultLang = Lang "en" (Just "US")
+
+-- a map of bibtex "string" macros
+type StringMap = Map.Map Text Text
+
+type BibParser = Parsec Text (Lang, StringMap)
+
+data Item = Item{ identifier :: Text
+ , sourcePos :: SourcePos
+ , entryType :: Text
+ , fields :: Map.Map Text Text
+ }
+ deriving (Show, Ord, Eq)
+
+itemToReference :: Locale -> Variant -> Item -> BibParser (Reference Inlines)
+itemToReference locale variant item = do
+ setPosition (sourcePos item)
+ bib item $ do
+ let lang = fromMaybe defaultLang $ localeLanguage locale
+ modify $ \st -> st{ localeLang = lang,
+ untitlecase = case lang of
+ (Lang "en" _) -> True
+ _ -> False }
+
+ id' <- asks identifier
+ otherIds <- (Just <$> getRawField "ids")
+ <|> return Nothing
+ (reftype, genre) <- getTypeAndGenre
+ -- hyphenation:
+ let getLangId = do
+ langid <- T.strip . T.toLower <$> getRawField "langid"
+ idopts <- T.strip . T.toLower . stringify <$>
+ getField "langidopts" <|> return ""
+ case (langid, idopts) of
+ ("english","variant=british") -> return "british"
+ ("english","variant=american") -> return "american"
+ ("english","variant=us") -> return "american"
+ ("english","variant=usmax") -> return "american"
+ ("english","variant=uk") -> return "british"
+ ("english","variant=australian") -> return "australian"
+ ("english","variant=newzealand") -> return "newzealand"
+ (x,_) -> return x
+ hyphenation <- (Just . toIETF . T.toLower <$>
+ (getLangId <|> getRawField "hyphenation"))
+ <|> return Nothing
+ modify $ \s -> s{ untitlecase = untitlecase s &&
+ case hyphenation of
+ Just x -> "en-" `T.isPrefixOf` x
+ _ -> True }
+
+
+ opts <- (parseOptions <$> getRawField "options") <|> return []
+
+ et <- asks entryType
+
+ -- titles
+ let isArticle = et `elem`
+ ["article", "periodical", "suppperiodical", "review"]
+ let isPeriodical = et == "periodical"
+ let isChapterlike = et `elem`
+ ["inbook","incollection","inproceedings","inreference","bookinbook"]
+
+ let getFieldMaybe f = (Just <$> getField f) <|> return Nothing
+
+ -- names
+ let getNameList' f = Just <$>
+ getNameList (("bibtex", case variant of
+ Bibtex -> "true"
+ Biblatex -> "false") : opts) f
+
+ author' <- getNameList' "author" <|> return Nothing
+ containerAuthor' <- getNameList' "bookauthor" <|> return Nothing
+ translator' <- getNameList' "translator" <|> return Nothing
+ editortype <- getRawField "editortype" <|> return mempty
+ editor'' <- getNameList' "editor" <|> return Nothing
+ director'' <- getNameList' "director" <|> return Nothing
+ let (editor', director') = case editortype of
+ "director" -> (Nothing, editor'')
+ _ -> (editor'', director'')
+ -- FIXME: add same for editora, editorb, editorc
+
+ -- dates
+ issued' <- (Just <$> (getDate "date" <|> getOldDate mempty)) <|>
+ return Nothing
+ eventDate' <- (Just <$> (getDate "eventdate" <|> getOldDate "event")) <|>
+ return Nothing
+ origDate' <- (Just <$> (getDate "origdate" <|> getOldDate "orig")) <|>
+ return Nothing
+ accessed' <- (Just <$> (getDate "urldate" <|> getOldDate "url")) <|>
+ return Nothing
+
+ -- locators
+ pages' <- getFieldMaybe "pages"
+ volume' <- getFieldMaybe "volume"
+ part' <- getFieldMaybe "part"
+ volumes' <- getFieldMaybe "volumes"
+ pagetotal' <- getFieldMaybe "pagetotal"
+ chapter' <- getFieldMaybe "chapter"
+ edition' <- getFieldMaybe "edition"
+ version' <- getFieldMaybe "version"
+ (number', collectionNumber', issue') <-
+ (getField "number" >>= \x ->
+ if et `elem` ["book","collection","proceedings","reference",
+ "mvbook","mvcollection","mvproceedings", "mvreference",
+ "bookinbook","inbook", "incollection","inproceedings",
+ "inreference", "suppbook","suppcollection"]
+ then return (Nothing, Just x, Nothing)
+ else if isArticle
+ then (getField "issue" >>= \y ->
+ return (Nothing, Nothing, Just $ concatWith ',' [x,y]))
+ <|> return (Nothing, Nothing, Just x)
+ else return (Just x, Nothing, Nothing))
+ <|> return (Nothing, Nothing, Nothing)
+
+ -- titles
+ hasMaintitle <- (True <$ getRawField "maintitle") <|> return False
+
+ title' <- Just <$>
+ ((guard isPeriodical >> getTitle "issuetitle")
+ <|> (guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "maintitle")
+ <|> getTitle "title")
+ <|> return Nothing
+
+ subtitle' <- (guard isPeriodical >> getTitle "issuesubtitle")
+ <|> (guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "mainsubtitle")
+ <|> getTitle "subtitle"
+ <|> return mempty
+ titleaddon' <- (guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "maintitleaddon")
+ <|> getTitle "titleaddon"
+ <|> return mempty
+
+ volumeTitle' <- Just <$>
+ ((guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "title")
+ <|> (guard hasMaintitle >>
+ guard isChapterlike >>
+ getTitle "booktitle"))
+ <|> return Nothing
+ volumeSubtitle' <- (guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "subtitle")
+ <|> (guard hasMaintitle >>
+ guard isChapterlike >>
+ getTitle "booksubtitle")
+ <|> return mempty
+ volumeTitleAddon' <- (guard hasMaintitle >>
+ guard (not isChapterlike) >>
+ getTitle "titleaddon")
+ <|> (guard hasMaintitle >>
+ guard isChapterlike >>
+ getTitle "booktitleaddon")
+ <|> return mempty
+
+ containerTitle' <- Just <$>
+ ((guard isPeriodical >> getPeriodicalTitle "title")
+ <|> (guard isChapterlike >> getTitle "maintitle")
+ <|> (guard isChapterlike >> getTitle "booktitle")
+ <|> getPeriodicalTitle "journaltitle"
+ <|> getPeriodicalTitle "journal")
+ <|> return Nothing
+ containerSubtitle' <- (guard isPeriodical >> getPeriodicalTitle "subtitle")
+ <|> (guard isChapterlike >> getTitle "mainsubtitle")
+ <|> (guard isChapterlike >> getTitle "booksubtitle")
+ <|> getPeriodicalTitle "journalsubtitle"
+ <|> return mempty
+ containerTitleAddon' <- (guard isPeriodical >>
+ getPeriodicalTitle "titleaddon")
+ <|> (guard isChapterlike >>
+ getTitle "maintitleaddon")
+ <|> (guard isChapterlike >>
+ getTitle "booktitleaddon")
+ <|> return mempty
+ containerTitleShort' <- Just <$>
+ ((guard isPeriodical >>
+ guard (not hasMaintitle) >>
+ getField "shorttitle")
+ <|> getPeriodicalTitle "shortjournal")
+ <|> return Nothing
+
+ -- change numerical series title to e.g. 'series 3'
+ let fixSeriesTitle [Str xs] | isNumber xs =
+ [Str (ordinalize locale xs), Space, Str (resolveKey' lang "jourser")]
+ fixSeriesTitle xs = xs
+ seriesTitle' <- (Just . B.fromList . fixSeriesTitle .
+ B.toList . resolveKey lang <$>
+ getTitle "series") <|>
+ return Nothing
+ shortTitle' <- Just <$>
+ ((guard (not hasMaintitle || isChapterlike) >>
+ getTitle "shorttitle")
+ <|> if (subtitle' /= mempty || titleaddon' /= mempty) &&
+ not hasMaintitle
+ then getShortTitle False "title"
+ else getShortTitle True "title")
+ <|> return Nothing
+
+ eventTitle' <- Just <$> getTitle "eventtitle" <|> return Nothing
+ origTitle' <- Just <$> getTitle "origtitle" <|> return Nothing
+
+ -- publisher
+ pubfields <- mapM (\f -> Just `fmap`
+ (if variant == Bibtex || f == "howpublished"
+ then getField f
+ else getLiteralList' f)
+ <|> return Nothing)
+ ["school","institution","organization", "howpublished","publisher"]
+ let publisher' = case catMaybes pubfields of
+ [] -> Nothing
+ xs -> Just $ concatWith ';' xs
+ origpublisher' <- (Just <$> getField "origpublisher") <|> return Nothing
+
+ -- places
+ venue' <- (Just <$> getField "venue") <|> return Nothing
+ address' <- Just <$>
+ (if variant == Bibtex
+ then getField "address"
+ else getLiteralList' "address"
+ <|> (guard (et /= "patent") >>
+ getLiteralList' "location"))
+ <|> return Nothing
+ origLocation' <- Just <$>
+ (if variant == Bibtex
+ then getField "origlocation"
+ else getLiteralList' "origlocation")
+ <|> return Nothing
+ jurisdiction' <- if reftype == "patent"
+ then Just <$>
+ (concatWith ';' . map (resolveKey lang) <$>
+ getLiteralList "location") <|> return Nothing
+ else return Nothing
+
+ -- url, doi, isbn, etc.:
+ -- note that with eprinttype = arxiv, we take eprint to be a partial url
+ -- archivePrefix is an alias for eprinttype
+ url' <- (guard (et == "online" || lookup "url" opts /= Just "false")
+ >> Just <$> getRawField "url")
+ <|> (do etype <- getRawField "eprinttype"
+ eprint <- getRawField "eprint"
+ let baseUrl =
+ case T.toLower etype of
+ "arxiv" -> "http://arxiv.org/abs/"
+ "jstor" -> "http://www.jstor.org/stable/"
+ "pubmed" -> "http://www.ncbi.nlm.nih.gov/pubmed/"
+ "googlebooks" -> "http://books.google.com?id="
+ _ -> ""
+ if T.null baseUrl
+ then mzero
+ else return $ Just $ baseUrl <> eprint)
+ <|> return Nothing
+ doi' <- (guard (lookup "doi" opts /= Just "false") >>
+ Just <$> getRawField "doi")
+ <|> return Nothing
+ isbn' <- Just <$> getRawField "isbn" <|> return Nothing
+ issn' <- Just <$> getRawField "issn" <|> return Nothing
+ pmid' <- Just <$> getRawField "pmid" <|> return Nothing
+ pmcid' <- Just <$> getRawField "pmcid" <|> return Nothing
+ callNumber' <- Just <$> getRawField "library" <|> return Nothing
+
+ -- notes
+ annotation' <- Just <$>
+ (getField "annotation" <|> getField "annote")
+ <|> return Nothing
+ abstract' <- Just <$> getField "abstract" <|> return Nothing
+ keywords' <- Just <$> getField "keywords" <|> return Nothing
+ note' <- if et == "periodical"
+ then return Nothing
+ else Just <$> getField "note" <|> return Nothing
+ addendum' <- if variant == Bibtex
+ then return Nothing
+ else Just <$> getField "addendum"
+ <|> return Nothing
+ pubstate' <- ( (Just . resolveKey lang <$> getField "pubstate")
+ <|> case dateLiteral <$> issued' of
+ Just (Just "forthcoming") ->
+ return $ Just $ B.str "forthcoming"
+ _ -> return Nothing
+ )
+
+
+
+
+ let addField (_, Nothing) = id
+ addField (f, Just x) = Map.insert f x
+ let vars = foldr addField mempty
+ [ ("other-ids", TextVal <$> otherIds)
+ , ("genre", TextVal <$> genre)
+ , ("language", TextVal <$> hyphenation)
+ -- dates
+ , ("accessed", DateVal <$> accessed')
+ , ("event-date", DateVal <$> eventDate')
+ , ("issued", DateVal <$> issued')
+ , ("original-date", DateVal <$> origDate')
+ -- names
+ , ("author", NamesVal <$> author')
+ , ("editor", NamesVal <$> editor')
+ , ("translator", NamesVal <$> translator')
+ , ("director", NamesVal <$> director')
+ , ("container-author", NamesVal <$> containerAuthor')
+ -- locators
+ , ("page", FancyVal . Walk.walk convertEnDash <$> pages')
+ , ("number-of-pages", FancyVal <$> pagetotal')
+ , ("volume", case (volume', part') of
+ (Nothing, Nothing) -> Nothing
+ (Just v, Nothing) -> Just $ FancyVal v
+ (Nothing, Just p) -> Just $ FancyVal p
+ (Just v, Just p) ->
+ Just $ FancyVal $ v <> B.str "." <> p)
+ , ("number-of-volumes", FancyVal <$> volumes')
+ , ("chapter-number", FancyVal <$> chapter')
+ , ("edition", FancyVal <$> edition')
+ , ("version", FancyVal <$> version')
+ , ("number", FancyVal <$> number')
+ , ("collection-number", FancyVal <$> collectionNumber')
+ , ("issue", FancyVal <$> issue')
+ -- title
+ , ("original-title", FancyVal <$> origTitle')
+ , ("event", FancyVal <$> eventTitle')
+ , ("title", case title' of
+ Just t -> Just $ FancyVal $
+ concatWith '.' [
+ concatWith ':' [t, subtitle']
+ , titleaddon' ]
+ Nothing -> Nothing)
+ , ("volume-title",
+ case volumeTitle' of
+ Just t -> Just $ FancyVal $
+ concatWith '.' [
+ concatWith ':' [t, volumeSubtitle']
+ , volumeTitleAddon' ]
+ Nothing -> Nothing)
+ , ("container-title",
+ case containerTitle' of
+ Just t -> Just $ FancyVal $
+ concatWith '.' [
+ concatWith ':' [t,
+ containerSubtitle']
+ , containerTitleAddon' ]
+ Nothing -> Nothing)
+ , ("container-title-short", FancyVal <$> containerTitleShort')
+ , ("collection-title", FancyVal <$> seriesTitle')
+ , ("title-short", FancyVal <$> shortTitle')
+ -- publisher
+ , ("publisher", FancyVal <$> publisher')
+ , ("original-publisher", FancyVal <$> origpublisher')
+ -- places
+ , ("jurisdiction", FancyVal <$> jurisdiction')
+ , ("event-place", FancyVal <$> venue')
+ , ("publisher-place", FancyVal <$> address')
+ , ("original-publisher-place", FancyVal <$> origLocation')
+ -- urls
+ , ("url", TextVal <$> url')
+ , ("doi", TextVal <$> doi')
+ , ("isbn", TextVal <$> isbn')
+ , ("issn", TextVal <$> issn')
+ , ("pmcid", TextVal <$> pmcid')
+ , ("pmid", TextVal <$> pmid')
+ , ("call-number", TextVal <$> callNumber')
+ -- notes
+ , ("note", case catMaybes [note', addendum'] of
+ [] -> Nothing
+ xs -> return $ FancyVal $ concatWith '.' xs)
+ , ("annote", FancyVal <$> annotation')
+ , ("abstract", FancyVal <$> abstract')
+ , ("keyword", FancyVal <$> keywords')
+ , ("status", FancyVal <$> pubstate')
+ ]
+ return $ Reference
+ { referenceId = ItemId id'
+ , referenceType = reftype
+ , referenceDisambiguation = Nothing
+ , referenceVariables = vars }
+
+
+bib :: Item -> Bib a -> BibParser a
+bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US")))
+
+resolveCrossRefs :: Variant -> [Item] -> [Item]
+resolveCrossRefs variant entries =
+ map (resolveCrossRef variant entries) entries
+
+resolveCrossRef :: Variant -> [Item] -> Item -> Item
+resolveCrossRef variant entries entry =
+ Map.foldrWithKey go entry (fields entry)
+ where go key val entry' =
+ if key == "crossref" || key == "xdata"
+ then entry'{ fields = fields entry' <>
+ Map.fromList (getXrefFields variant
+ entry entries val) }
+ else entry'
+
+getXrefFields :: Variant -> Item -> [Item] -> Text -> [(Text, Text)]
+getXrefFields variant baseEntry entries keys = do
+ let keys' = splitKeys keys
+ xrefEntry <- [e | e <- entries, identifier e `elem` keys']
+ (k, v) <- Map.toList $ fields xrefEntry
+ if k == "crossref" || k == "xdata"
+ then do
+ xs <- mapM (getXrefFields variant baseEntry entries)
+ (splitKeys v)
+ (x, y) <- xs
+ guard $ isNothing $ Map.lookup x $ fields xrefEntry
+ return (x, y)
+ else do
+ k' <- case variant of
+ Bibtex -> return k
+ Biblatex -> transformKey
+ (entryType xrefEntry) (entryType baseEntry) k
+ guard $ isNothing $ Map.lookup k' $ fields baseEntry
+ return (k',v)
+
+
+
+data BibState = BibState{
+ untitlecase :: Bool
+ , localeLang :: Lang
+ }
+
+type Bib = RWST Item () BibState BibParser
+
+blocksToInlines :: [Block] -> Inlines
+blocksToInlines bs =
+ case bs of
+ [Plain xs] -> B.fromList xs
+ [Para xs] -> B.fromList xs
+ _ -> B.fromList $ Walk.query (:[]) bs
+
+adjustSpans :: Lang -> Inline -> Inline
+adjustSpans lang (RawInline (Format "latex") s)
+ | s == "\\hyphen" || s == "\\hyphen " = Str "-"
+ | otherwise = parseRawLaTeX lang s
+adjustSpans _ SoftBreak = Space
+adjustSpans _ x = x
+
+parseRawLaTeX :: Lang -> Text -> Inline
+parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) =
+ case parseLaTeX lang contents of
+ Right [Para ys] -> f command ys
+ Right [Plain ys] -> f command ys
+ Right [] -> f command []
+ _ -> RawInline (Format "latex") t
+ where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs
+ command = T.strip command'
+ contents = T.drop 1 $ T.dropEnd 1 contents'
+ f "mkbibquote" ils = Span nullAttr [Quoted DoubleQuote ils]
+ f "mkbibemph" ils = Span nullAttr [Emph ils]
+ f "mkbibitalic" ils = Span nullAttr [Emph ils]
+ f "mkbibbold" ils = Span nullAttr [Strong ils]
+ f "mkbibparens" ils = Span nullAttr $
+ [Str "("] ++ ils ++ [Str ")"]
+ f "mkbibbrackets" ils = Span nullAttr $
+ [Str "["] ++ ils ++ [Str "]"]
+ -- ... both should be nestable & should work in year fields
+ f "autocap" ils = Span nullAttr ils
+ -- TODO: should work in year fields
+ f "textnormal" ils = Span ("",["nodecor"],[]) ils
+ f "bibstring" [Str s] = Str $ resolveKey' lang s
+ f "adddot" [] = Str "."
+ f "adddotspace" [] = Span nullAttr [Str ".", Space]
+ f "addabbrvspace" [] = Space
+ f _ ils = Span nullAttr ils
+parseRawLaTeX _ t = RawInline (Format "latex") t
+
+latex' :: Text -> Bib [Block]
+latex' t = do
+ lang <- gets localeLang
+ case parseLaTeX lang t of
+ Left _ -> mzero
+ Right bs -> return bs
+
+parseLaTeX :: Lang -> Text -> Either PandocError [Block]
+parseLaTeX lang t =
+ case runPure (readLaTeX
+ def{ readerExtensions =
+ extensionsFromList [Ext_raw_tex, Ext_smart] } t) of
+ Left e -> Left e
+ Right (Pandoc _ bs) -> Right $ Walk.walk (adjustSpans lang) bs
+
+latex :: Text -> Bib Inlines
+latex = fmap blocksToInlines . latex' . T.strip
+
+type Options = [(Text, Text)]
+
+parseOptions :: Text -> Options
+parseOptions = map breakOpt . T.splitOn ","
+ where breakOpt x = case T.break (=='=') x of
+ (w,v) -> (T.toLower $ T.strip w,
+ T.toLower $ T.strip $ T.drop 1 v)
+
+bibEntries :: BibParser [Item]
+bibEntries = do
+ skipMany nonEntry
+ many (bibItem <* skipMany nonEntry)
+ where nonEntry = bibSkip <|>
+ try (char '@' >>
+ (bibComment <|> bibPreamble <|> bibString))
+
+bibSkip :: BibParser ()
+bibSkip = skipMany1 (satisfy (/='@'))
+
+bibComment :: BibParser ()
+bibComment = do
+ cistring "comment"
+ spaces
+ void inBraces <|> bibSkip <|> return ()
+
+bibPreamble :: BibParser ()
+bibPreamble = do
+ cistring "preamble"
+ spaces
+ void inBraces
+
+bibString :: BibParser ()
+bibString = do
+ cistring "string"
+ spaces
+ char '{'
+ spaces
+ (k,v) <- entField
+ char '}'
+ updateState (\(l,m) -> (l, Map.insert k v m))
+ return ()
+
+inBraces :: BibParser Text
+inBraces = try $ do
+ char '{'
+ res <- manyTill
+ ( (T.pack <$> many1 (noneOf "{}\\"))
+ <|> (char '\\' >> ( (char '{' >> return "\\{")
+ <|> (char '}' >> return "\\}")
+ <|> return "\\"))
+ <|> (braced <$> inBraces)
+ ) (char '}')
+ return $ T.concat res
+
+braced :: Text -> Text
+braced = T.cons '{' . flip T.snoc '}'
+
+inQuotes :: BibParser Text
+inQuotes = do
+ char '"'
+ T.concat <$> manyTill
+ ( (T.pack <$> many1 (noneOf "\"\\{"))
+ <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
+ <|> braced <$> inBraces
+ ) (char '"')
+
+fieldName :: BibParser Text
+fieldName = resolveAlias . T.toLower . T.pack
+ <$> many1 (letter <|> digit <|> oneOf "-_:+")
+
+isBibtexKeyChar :: Char -> Bool
+isBibtexKeyChar c =
+ isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*&" :: [Char])
+
+bibItem :: BibParser Item
+bibItem = do
+ char '@'
+ pos <- getPosition
+ enttype <- map toLower <$> many1 letter
+ spaces
+ char '{'
+ spaces
+ entid <- many1 (satisfy isBibtexKeyChar)
+ spaces
+ char ','
+ spaces
+ entfields <- entField `sepEndBy` (char ',' >> spaces)
+ spaces
+ char '}'
+ return $ Item (T.pack entid) pos (T.pack enttype) (Map.fromList entfields)
+
+entField :: BibParser (Text, Text)
+entField = do
+ k <- fieldName
+ spaces
+ char '='
+ spaces
+ vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
+ try (spaces >> char '#' >> spaces)
+ spaces
+ return (k, T.concat vs)
+
+resolveAlias :: Text -> Text
+resolveAlias "archiveprefix" = "eprinttype"
+resolveAlias "primaryclass" = "eprintclass"
+resolveAlias s = s
+
+rawWord :: BibParser Text
+rawWord = T.pack <$> many1 alphaNum
+
+expandString :: BibParser Text
+expandString = do
+ k <- fieldName
+ (lang, strs) <- getState
+ case Map.lookup k strs of
+ Just v -> return v
+ Nothing -> return $ resolveKey' lang k
+
+cistring :: Text -> BibParser Text
+cistring s = try (go s)
+ where go t = case T.uncons t of
+ Nothing -> return ""
+ Just (c,cs) -> do
+ x <- char (toLower c) <|> char (toUpper c)
+ xs <- go cs
+ return (T.cons x xs)
+
+splitKeys :: Text -> [Text]
+splitKeys = filter (not . T.null) . T.split (\c -> c == ' ' || c == ',')
+
+-- Biblatex Localization Keys (see Biblatex manual)
+-- Currently we only map a subset likely to be used in Biblatex *databases*
+-- (in fields such as `type`, and via `\bibstring{}` commands).
+
+parseMonth :: Text -> Maybe Int
+parseMonth s =
+ case T.toLower s of
+ "jan" -> Just 1
+ "feb" -> Just 2
+ "mar" -> Just 3
+ "apr" -> Just 4
+ "may" -> Just 5
+ "jun" -> Just 6
+ "jul" -> Just 7
+ "aug" -> Just 8
+ "sep" -> Just 9
+ "oct" -> Just 10
+ "nov" -> Just 11
+ "dec" -> Just 12
+ _ -> readMay (T.unpack s)
+
+notFound :: Text -> Bib a
+notFound f = Prelude.fail $ T.unpack f ++ " not found"
+
+getField :: Text -> Bib Inlines
+getField f = do
+ fs <- asks fields
+ case Map.lookup f fs of
+ Just x -> latex x
+ Nothing -> notFound f
+
+
+getPeriodicalTitle :: Text -> Bib Inlines
+getPeriodicalTitle f = do
+ ils <- getField f
+ return ils
+
+protectCase :: (Inlines -> Inlines) -> (Inlines -> Inlines)
+protectCase f = Walk.walk unprotect . f . Walk.walk protect
+ where
+ protect (Span ("",[],[]) xs) = Span ("",["nocase"],[]) xs
+ protect x = x
+ unprotect (Span ("",["nocase"],[]) xs)
+ | hasLowercaseWord xs = Span ("",["nocase"],[]) xs
+ | otherwise = Span ("",[],[]) xs
+ unprotect x = x
+ hasLowercaseWord = any startsWithLowercase . splitStrWhen isPunctuation
+ startsWithLowercase (Str (T.uncons -> Just (x,_))) = isLower x
+ startsWithLowercase _ = False
+
+unTitlecase :: Maybe Lang -> Inlines -> Inlines
+unTitlecase mblang = protectCase (caseTransform (withSentenceCase mblang))
+
+getTitle :: Text -> Bib Inlines
+getTitle f = do
+ ils <- getField f
+ utc <- gets untitlecase
+ lang <- gets localeLang
+ let processTitle = if utc then unTitlecase (Just lang) else id
+ return $ processTitle ils
+
+getShortTitle :: Bool -> Text -> Bib Inlines
+getShortTitle requireColon f = do
+ ils <- splitStrWhen (==':') . B.toList <$> getTitle f
+ if not requireColon || containsColon ils
+ then return $ B.fromList $ upToColon ils
+ else return mempty
+
+containsColon :: [Inline] -> Bool
+containsColon xs = Str ":" `elem` xs
+
+upToColon :: [Inline] -> [Inline]
+upToColon xs = takeWhile (/= Str ":") xs
+
+isNumber :: Text -> Bool
+isNumber t = case T.uncons t of
+ Just ('-', ds) -> T.all isDigit ds
+ Just _ -> T.all isDigit t
+ Nothing -> False
+
+getDate :: Text -> Bib Date
+getDate f = do
+ -- the ~ can used for approx dates, but the latex reader
+ -- parses this as a nonbreaking space, so we need to convert it back!
+ let nbspToTilde '\160' = '~'
+ nbspToTilde c = c
+ mbd <- rawDateEDTF . T.map nbspToTilde <$> getRawField f
+ case mbd of
+ Nothing -> Prelude.fail "expected date"
+ Just d -> return d
+
+-- A negative (BC) year might be written with -- or --- in bibtex:
+fixLeadingDash :: Text -> Text
+fixLeadingDash t = case T.uncons t of
+ Just (c, ds) | (c == '–' || c == '—') && firstIsDigit ds -> T.cons '–' ds
+ _ -> t
+ where firstIsDigit = maybe False (isDigit . fst) . T.uncons
+
+getOldDate :: Text -> Bib Date
+getOldDate prefix = do
+ year' <- (readMay . T.unpack . fixLeadingDash . stringify
+ <$> getField (prefix <> "year")) <|> return Nothing
+ month' <- (parseMonth <$> getRawField (prefix <> "month"))
+ <|> return Nothing
+ day' <- (readMay . T.unpack <$> getRawField (prefix <> "day"))
+ <|> return Nothing
+ endyear' <- (readMay . T.unpack . fixLeadingDash . stringify
+ <$> getField (prefix <> "endyear")) <|> return Nothing
+ endmonth' <- (parseMonth . stringify
+ <$> getField (prefix <> "endmonth")) <|> return Nothing
+ endday' <- (readMay . T.unpack . stringify <$>
+ getField (prefix <> "endday")) <|> return Nothing
+ let toDateParts (y', m', d') =
+ DateParts $
+ case y' of
+ Nothing -> []
+ Just y ->
+ case m' of
+ Nothing -> [y]
+ Just m ->
+ case d' of
+ Nothing -> [y,m]
+ Just d -> [y,m,d]
+ let dateparts = filter (\x -> x /= DateParts [])
+ $ map toDateParts [(year',month',day'),
+ (endyear',endmonth',endday')]
+ literal <- if null dateparts
+ then Just <$> getRawField (prefix <> "year")
+ else return Nothing
+ return $
+ Date { dateParts = dateparts
+ , dateCirca = False
+ , dateSeason = Nothing
+ , dateLiteral = literal }
+
+getRawField :: Text -> Bib Text
+getRawField f =
+ (stringify <$> getField f)
+ <|> do fs <- asks fields
+ case Map.lookup f fs of
+ Just x -> return x
+ Nothing -> notFound f
+
+getLiteralList :: Text -> Bib [Inlines]
+getLiteralList f = do
+ fs <- asks fields
+ case Map.lookup f fs of
+ Just x -> latex' x >>= toLiteralList
+ Nothing -> notFound f
+
+-- separates items with semicolons
+getLiteralList' :: Text -> Bib Inlines
+getLiteralList' f = do
+ fs <- asks fields
+ case Map.lookup f fs of
+ Just x -> do
+ x' <- latex' x
+ case x' of
+ [Para xs] ->
+ return $ B.fromList
+ $ intercalate [Str ";", Space]
+ $ splitByAnd xs
+ [Plain xs] ->
+ return $ B.fromList
+ $ intercalate [Str ";", Space]
+ $ splitByAnd xs
+ _ -> mzero
+ Nothing -> notFound f
+
+splitByAnd :: [Inline] -> [[Inline]]
+splitByAnd = splitOn [Space, Str "and", Space]
+
+toLiteralList :: [Block] -> Bib [Inlines]
+toLiteralList [Para xs] =
+ return $ map B.fromList $ splitByAnd xs
+toLiteralList [Plain xs] = toLiteralList [Para xs]
+toLiteralList _ = mzero
+
+concatWith :: Char -> [Inlines] -> Inlines
+concatWith sep = foldl' go mempty
+ where go :: Inlines -> Inlines -> Inlines
+ go accum s
+ | s == mempty = accum
+ | otherwise =
+ case Seq.viewr (B.unMany accum) of
+ Seq.EmptyR -> s
+ _ Seq.:> Str x
+ | not (T.null x) &&
+ T.last x `elem` ("!?.,:;" :: String)
+ -> accum <> B.space <> s
+ _ -> accum <> B.str (T.singleton sep) <>
+ B.space <> s
+
+
+getNameList :: Options -> Text -> Bib [Name]
+getNameList opts f = do
+ fs <- asks fields
+ case Map.lookup f fs of
+ Just x -> latexNames opts x
+ Nothing -> notFound f
+
+toNameList :: Options -> [Block] -> Bib [Name]
+toNameList opts [Para xs] =
+ filter (/= emptyName) <$> mapM (toName opts . addSpaceAfterPeriod)
+ (splitByAnd xs)
+toNameList opts [Plain xs] = toNameList opts [Para xs]
+toNameList _ _ = mzero
+
+latexNames :: Options -> Text -> Bib [Name]
+latexNames opts t = latex' (T.strip t) >>= toNameList opts
+
+-- see issue 392 for motivation. We want to treat
+-- "J.G. Smith" and "J. G. Smith" the same.
+addSpaceAfterPeriod :: [Inline] -> [Inline]
+addSpaceAfterPeriod = go . splitStrWhen (=='.')
+ where
+ go [] = []
+ go (Str (T.unpack -> [c]):Str ".":Str (T.unpack -> [d]):xs)
+ | isLetter d
+ , isLetter c
+ , isUpper c
+ , isUpper d
+ = Str (T.singleton c):Str ".":Space:go (Str (T.singleton d):xs)
+ go (x:xs) = x:go xs
+
+emptyName :: Name
+emptyName =
+ Name { nameFamily = Nothing
+ , nameGiven = Nothing
+ , nameDroppingParticle = Nothing
+ , nameNonDroppingParticle = Nothing
+ , nameSuffix = Nothing
+ , nameLiteral = Nothing
+ , nameCommaSuffix = False
+ , nameStaticOrdering = False
+ }
+
+toName :: Options -> [Inline] -> Bib Name
+toName _ [Str "others"] =
+ return emptyName{ nameLiteral = Just "others" }
+toName _ [Span ("",[],[]) ils] = -- corporate author
+ return emptyName{ nameLiteral = Just $ stringify ils }
+ -- extended BibLaTeX name format - see #266
+toName _ ils@(Str ys:_) | T.any (== '=') ys = do
+ let commaParts = splitWhen (== Str ",")
+ . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160')
+ $ ils
+ let addPart ag (Str "given" : Str "=" : xs) =
+ ag{ nameGiven = case nameGiven ag of
+ Nothing -> Just $ stringify xs
+ Just t -> Just $ t <> " " <> stringify xs }
+ addPart ag (Str "family" : Str "=" : xs) =
+ ag{ nameFamily = Just $ stringify xs }
+ addPart ag (Str "prefix" : Str "=" : xs) =
+ ag{ nameDroppingParticle = Just $ stringify xs }
+ addPart ag (Str "useprefix" : Str "=" : Str "true" : _) =
+ ag{ nameNonDroppingParticle = nameDroppingParticle ag
+ , nameDroppingParticle = Nothing }
+ addPart ag (Str "suffix" : Str "=" : xs) =
+ ag{ nameSuffix = Just $ stringify xs }
+ addPart ag (Space : xs) = addPart ag xs
+ addPart ag _ = ag
+ return $ foldl' addPart emptyName commaParts
+-- First von Last
+-- von Last, First
+-- von Last, Jr ,First
+-- NOTE: biblatex and bibtex differ on:
+-- Drummond de Andrade, Carlos
+-- bibtex takes "Drummond de" as the von;
+-- biblatex takes the whole as a last name.
+-- See https://github.com/plk/biblatex/issues/236
+-- Here we implement the more sensible biblatex behavior.
+toName opts ils = do
+ let useprefix = optionSet "useprefix" opts
+ let usecomma = optionSet "juniorcomma" opts
+ let bibtex = optionSet "bibtex" opts
+ let words' = wordsBy (\x -> x == Space || x == Str "\160")
+ let commaParts = map words' $ splitWhen (== Str ",")
+ $ splitStrWhen
+ (\c -> c == ',' || c == '\160') ils
+ let (first, vonlast, jr) =
+ case commaParts of
+ --- First is the longest sequence of white-space separated
+ -- words starting with an uppercase and that is not the
+ -- whole string. von is the longest sequence of whitespace
+ -- separated words whose last word starts with lower case
+ -- and that is not the whole string.
+ [fvl] -> let (caps', rest') = span isCapitalized fvl
+ in if null rest' && not (null caps')
+ then (init caps', [last caps'], [])
+ else (caps', rest', [])
+ [vl,f] -> (f, vl, [])
+ (vl:j:f:_) -> (f, vl, j )
+ [] -> ([], [], [])
+
+ let (von, lastname) =
+ if bibtex
+ then case span isCapitalized $ reverse vonlast of
+ ([],w:ws) -> (reverse ws, [w])
+ (vs, ws) -> (reverse ws, reverse vs)
+ else case break isCapitalized vonlast of
+ (vs@(_:_), []) -> (init vs, [last vs])
+ (vs, ws) -> (vs, ws)
+ let prefix = T.unwords $ map stringify von
+ let family = T.unwords $ map stringify lastname
+ let suffix = T.unwords $ map stringify jr
+ let given = T.unwords $ map stringify first
+ return
+ Name { nameFamily = if T.null family
+ then Nothing
+ else Just family
+ , nameGiven = if T.null given
+ then Nothing
+ else Just given
+ , nameDroppingParticle = if useprefix || T.null prefix
+ then Nothing
+ else Just prefix
+ , nameNonDroppingParticle = if useprefix && not (T.null prefix)
+ then Just prefix
+ else Nothing
+ , nameSuffix = if T.null suffix
+ then Nothing
+ else Just suffix
+ , nameLiteral = Nothing
+ , nameCommaSuffix = usecomma
+ , nameStaticOrdering = False
+ }
+
+splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
+splitStrWhen _ [] = []
+splitStrWhen p (Str xs : ys) = map Str (go xs) ++ splitStrWhen p ys
+ where go s =
+ let (w,z) = T.break p s
+ in if T.null z
+ then if T.null w
+ then []
+ else [w]
+ else if T.null w
+ then (T.take 1 z : go (T.drop 1 z))
+ else (w : T.take 1 z : go (T.drop 1 z))
+splitStrWhen p (x : ys) = x : splitStrWhen p ys
+
+ordinalize :: Locale -> Text -> Text
+ordinalize locale n =
+ let terms = localeTerms locale
+ pad0 t = case T.length t of
+ 0 -> "00"
+ 1 -> "0" <> t
+ _ -> t
+ in case Map.lookup ("ordinal-" <> pad0 n) terms <|>
+ Map.lookup "ordinal" terms of
+ Nothing -> n
+ Just [] -> n
+ Just (t:_) -> n <> snd t
+
+isCapitalized :: [Inline] -> Bool
+isCapitalized (Str (T.uncons -> Just (c,cs)) : rest)
+ | isUpper c = True
+ | isDigit c = isCapitalized (Str cs : rest)
+ | otherwise = False
+isCapitalized (_:rest) = isCapitalized rest
+isCapitalized [] = True
+
+optionSet :: Text -> Options -> Bool
+optionSet key opts = case lookup key opts of
+ Just "true" -> True
+ Just s -> s == mempty
+ _ -> False
+
+getTypeAndGenre :: Bib (Text, Maybe Text)
+getTypeAndGenre = do
+ lang <- gets localeLang
+ et <- asks entryType
+ guard $ et /= "xdata"
+ reftype' <- resolveKey' lang <$> getRawField "type"
+ <|> return mempty
+ st <- getRawField "entrysubtype" <|> return mempty
+ isEvent <- (True <$ (getRawField "eventdate"
+ <|> getRawField "eventtitle"
+ <|> getRawField "venue")) <|> return False
+ let reftype =
+ case et of
+ "article"
+ | st == "magazine" -> "article-magazine"
+ | st == "newspaper" -> "article-newspaper"
+ | otherwise -> "article-journal"
+ "book" -> "book"
+ "booklet" -> "pamphlet"
+ "bookinbook" -> "chapter"
+ "collection" -> "book"
+ "dataset" -> "dataset"
+ "electronic" -> "webpage"
+ "inbook" -> "chapter"
+ "incollection" -> "chapter"
+ "inreference" -> "entry-encyclopedia"
+ "inproceedings" -> "paper-conference"
+ "manual" -> "book"
+ "mastersthesis" -> "thesis"
+ "misc" -> ""
+ "mvbook" -> "book"
+ "mvcollection" -> "book"
+ "mvproceedings" -> "book"
+ "mvreference" -> "book"
+ "online" -> "webpage"
+ "patent" -> "patent"
+ "periodical"
+ | st == "magazine" -> "article-magazine"
+ | st == "newspaper" -> "article-newspaper"
+ | otherwise -> "article-journal"
+ "phdthesis" -> "thesis"
+ "proceedings" -> "book"
+ "reference" -> "book"
+ "report" -> "report"
+ "software" -> "book" -- no "software" type in CSL
+ "suppbook" -> "chapter"
+ "suppcollection" -> "chapter"
+ "suppperiodical"
+ | st == "magazine" -> "article-magazine"
+ | st == "newspaper" -> "article-newspaper"
+ | otherwise -> "article-journal"
+ "techreport" -> "report"
+ "thesis" -> "thesis"
+ "unpublished" -> if isEvent then "speech" else "manuscript"
+ "www" -> "webpage"
+ -- biblatex, "unsupported"
+ "artwork" -> "graphic"
+ "audio" -> "song" -- for audio *recordings*
+ "commentary" -> "book"
+ "image" -> "graphic" -- or "figure" ?
+ "jurisdiction" -> "legal_case"
+ "legislation" -> "legislation" -- or "bill" ?
+ "legal" -> "treaty"
+ "letter" -> "personal_communication"
+ "movie" -> "motion_picture"
+ "music" -> "song" -- for musical *recordings*
+ "performance" -> "speech"
+ "review" -> "review" -- or "review-book" ?
+ "standard" -> "legislation"
+ "video" -> "motion_picture"
+ -- biblatex-apa:
+ "data" -> "dataset"
+ "letters" -> "personal_communication"
+ "newsarticle" -> "article-newspaper"
+ _ -> ""
+
+ let refgenre =
+ case et of
+ "mastersthesis" -> if T.null reftype'
+ then Just $ resolveKey' lang "mathesis"
+ else Just reftype'
+ "phdthesis" -> if T.null reftype'
+ then Just $ resolveKey' lang "phdthesis"
+ else Just reftype'
+ _ -> if T.null reftype'
+ then Nothing
+ else Just reftype'
+ return (reftype, refgenre)
+
+
+-- transformKey source target key
+-- derived from Appendix C of bibtex manual
+transformKey :: Text -> Text -> Text -> [Text]
+transformKey _ _ "ids" = []
+transformKey _ _ "crossref" = []
+transformKey _ _ "xref" = []
+transformKey _ _ "entryset" = []
+transformKey _ _ "entrysubtype" = []
+transformKey _ _ "execute" = []
+transformKey _ _ "label" = []
+transformKey _ _ "options" = []
+transformKey _ _ "presort" = []
+transformKey _ _ "related" = []
+transformKey _ _ "relatedoptions" = []
+transformKey _ _ "relatedstring" = []
+transformKey _ _ "relatedtype" = []
+transformKey _ _ "shorthand" = []
+transformKey _ _ "shorthandintro" = []
+transformKey _ _ "sortkey" = []
+transformKey x y "author"
+ | x `elem` ["mvbook", "book"] &&
+ y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"]
+-- note: this next clause is not in the biblatex manual, but it makes
+-- sense in the context of CSL conversion:
+transformKey x y "author"
+ | x == "mvbook" && y == "book" = ["bookauthor", "author"]
+transformKey "mvbook" y z
+ | y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z
+transformKey x y z
+ | x `elem` ["mvcollection", "mvreference"] &&
+ y `elem` ["collection", "reference", "incollection", "inreference",
+ "suppcollection"] = standardTrans z
+transformKey "mvproceedings" y z
+ | y `elem` ["proceedings", "inproceedings"] = standardTrans z
+transformKey "book" y z
+ | y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z
+transformKey x y z
+ | x `elem` ["collection", "reference"] &&
+ y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z
+transformKey "proceedings" "inproceedings" z = bookTrans z
+transformKey "periodical" y z
+ | y `elem` ["article", "suppperiodical"] =
+ case z of
+ "title" -> ["journaltitle"]
+ "subtitle" -> ["journalsubtitle"]
+ "shorttitle" -> []
+ "sorttitle" -> []
+ "indextitle" -> []
+ "indexsorttitle" -> []
+ _ -> [z]
+transformKey _ _ x = [x]
+
+standardTrans :: Text -> [Text]
+standardTrans z =
+ case z of
+ "title" -> ["maintitle"]
+ "subtitle" -> ["mainsubtitle"]
+ "titleaddon" -> ["maintitleaddon"]
+ "shorttitle" -> []
+ "sorttitle" -> []
+ "indextitle" -> []
+ "indexsorttitle" -> []
+ _ -> [z]
+
+bookTrans :: Text -> [Text]
+bookTrans z =
+ case z of
+ "title" -> ["booktitle"]
+ "subtitle" -> ["booksubtitle"]
+ "titleaddon" -> ["booktitleaddon"]
+ "shorttitle" -> []
+ "sorttitle" -> []
+ "indextitle" -> []
+ "indexsorttitle" -> []
+ _ -> [z]
+
+resolveKey :: Lang -> Inlines -> Inlines
+resolveKey lang ils = Walk.walk go ils
+ where go (Str s) = Str $ resolveKey' lang s
+ go x = x
+
+resolveKey' :: Lang -> Text -> Text
+resolveKey' lang@(Lang l _) k =
+ case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of
+ Nothing -> k
+ Just (x, _) -> either (const k) stringify $ parseLaTeX lang x
+
+convertEnDash :: Inline -> Inline
+convertEnDash (Str s) = Str (T.map (\c -> if c == '–' then '-' else c) s)
+convertEnDash x = x