diff options
Diffstat (limited to 'src/Text/Pandoc/Citeproc/BibTeX.hs')
-rw-r--r-- | src/Text/Pandoc/Citeproc/BibTeX.hs | 1237 |
1 files changed, 1237 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs new file mode 100644 index 000000000..5919fee77 --- /dev/null +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -0,0 +1,1237 @@ +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleContexts #-} +{-# OPTIONS_GHC -fno-warn-unused-do-bind #-} +----------------------------------------------------------------------------- +-- | +-- Module : Text.CSL.Input.Bibtex +-- Copyright : (c) John MacFarlane +-- License : BSD-style (see LICENSE) +-- +-- Maintainer : John MacFarlane <fiddlosopher@gmail.com> +-- Stability : unstable-- Portability : unportable +-- +----------------------------------------------------------------------------- + +module Text.Pandoc.Citeproc.BibTeX + ( Variant(..) + , readBibtexString + ) + where + +import Text.Pandoc.Definition +import Text.Pandoc.Builder as B +import Text.Pandoc.Readers.LaTeX (readLaTeX) +import Text.Pandoc.Extensions (Extension(..), extensionsFromList) +import Text.Pandoc.Options (ReaderOptions(..)) +import Text.Pandoc.Class (runPure) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Shared (stringify) +import qualified Text.Pandoc.Walk as Walk +import Citeproc.Types +import Citeproc.CaseTransform (withSentenceCase) +import Citeproc.Pandoc (caseTransform) +import Text.Pandoc.Citeproc.Util (toIETF) +import Text.Pandoc.Citeproc.Data (biblatexStringMap) +import Data.Default +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Map as Map +import Data.Maybe +import Text.Parsec hiding (State, many, (<|>)) +import Control.Applicative +import Data.List.Split (splitOn, splitWhen, wordsBy) +import Control.Monad.RWS hiding ((<>)) +import qualified Data.Sequence as Seq +import Data.Char (isAlphaNum, isDigit, isLetter, + isUpper, toLower, toUpper, + isLower, isPunctuation) +import Data.List (foldl', intercalate) +import Safe (readMay) + +data Variant = Bibtex | Biblatex + deriving (Show, Eq, Ord) + +-- | Parse BibTeX or BibLaTeX into a list of 'Reference's. +readBibtexString :: Variant -- ^ bibtex or biblatex + -> Locale -- ^ Locale + -> (Text -> Bool) -- ^ Filter on citation ids + -> Text -- ^ bibtex/biblatex text + -> Either ParseError [Reference Inlines] +readBibtexString variant locale idpred contents = do + case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>= + mapM (itemToReference locale variant) . + filter (idpred . identifier)) + (fromMaybe defaultLang $ localeLanguage locale, Map.empty) + "" contents of + Left err -> Left err + Right xs -> return xs + +defaultLang :: Lang +defaultLang = Lang "en" (Just "US") + +-- a map of bibtex "string" macros +type StringMap = Map.Map Text Text + +type BibParser = Parsec Text (Lang, StringMap) + +data Item = Item{ identifier :: Text + , sourcePos :: SourcePos + , entryType :: Text + , fields :: Map.Map Text Text + } + deriving (Show, Ord, Eq) + +itemToReference :: Locale -> Variant -> Item -> BibParser (Reference Inlines) +itemToReference locale variant item = do + setPosition (sourcePos item) + bib item $ do + let lang = fromMaybe defaultLang $ localeLanguage locale + modify $ \st -> st{ localeLang = lang, + untitlecase = case lang of + (Lang "en" _) -> True + _ -> False } + + id' <- asks identifier + otherIds <- (Just <$> getRawField "ids") + <|> return Nothing + (reftype, genre) <- getTypeAndGenre + -- hyphenation: + let getLangId = do + langid <- T.strip . T.toLower <$> getRawField "langid" + idopts <- T.strip . T.toLower . stringify <$> + getField "langidopts" <|> return "" + case (langid, idopts) of + ("english","variant=british") -> return "british" + ("english","variant=american") -> return "american" + ("english","variant=us") -> return "american" + ("english","variant=usmax") -> return "american" + ("english","variant=uk") -> return "british" + ("english","variant=australian") -> return "australian" + ("english","variant=newzealand") -> return "newzealand" + (x,_) -> return x + hyphenation <- (Just . toIETF . T.toLower <$> + (getLangId <|> getRawField "hyphenation")) + <|> return Nothing + modify $ \s -> s{ untitlecase = untitlecase s && + case hyphenation of + Just x -> "en-" `T.isPrefixOf` x + _ -> True } + + + opts <- (parseOptions <$> getRawField "options") <|> return [] + + et <- asks entryType + + -- titles + let isArticle = et `elem` + ["article", "periodical", "suppperiodical", "review"] + let isPeriodical = et == "periodical" + let isChapterlike = et `elem` + ["inbook","incollection","inproceedings","inreference","bookinbook"] + + let getFieldMaybe f = (Just <$> getField f) <|> return Nothing + + -- names + let getNameList' f = Just <$> + getNameList (("bibtex", case variant of + Bibtex -> "true" + Biblatex -> "false") : opts) f + + author' <- getNameList' "author" <|> return Nothing + containerAuthor' <- getNameList' "bookauthor" <|> return Nothing + translator' <- getNameList' "translator" <|> return Nothing + editortype <- getRawField "editortype" <|> return mempty + editor'' <- getNameList' "editor" <|> return Nothing + director'' <- getNameList' "director" <|> return Nothing + let (editor', director') = case editortype of + "director" -> (Nothing, editor'') + _ -> (editor'', director'') + -- FIXME: add same for editora, editorb, editorc + + -- dates + issued' <- (Just <$> (getDate "date" <|> getOldDate mempty)) <|> + return Nothing + eventDate' <- (Just <$> (getDate "eventdate" <|> getOldDate "event")) <|> + return Nothing + origDate' <- (Just <$> (getDate "origdate" <|> getOldDate "orig")) <|> + return Nothing + accessed' <- (Just <$> (getDate "urldate" <|> getOldDate "url")) <|> + return Nothing + + -- locators + pages' <- getFieldMaybe "pages" + volume' <- getFieldMaybe "volume" + part' <- getFieldMaybe "part" + volumes' <- getFieldMaybe "volumes" + pagetotal' <- getFieldMaybe "pagetotal" + chapter' <- getFieldMaybe "chapter" + edition' <- getFieldMaybe "edition" + version' <- getFieldMaybe "version" + (number', collectionNumber', issue') <- + (getField "number" >>= \x -> + if et `elem` ["book","collection","proceedings","reference", + "mvbook","mvcollection","mvproceedings", "mvreference", + "bookinbook","inbook", "incollection","inproceedings", + "inreference", "suppbook","suppcollection"] + then return (Nothing, Just x, Nothing) + else if isArticle + then (getField "issue" >>= \y -> + return (Nothing, Nothing, Just $ concatWith ',' [x,y])) + <|> return (Nothing, Nothing, Just x) + else return (Just x, Nothing, Nothing)) + <|> return (Nothing, Nothing, Nothing) + + -- titles + hasMaintitle <- (True <$ getRawField "maintitle") <|> return False + + title' <- Just <$> + ((guard isPeriodical >> getTitle "issuetitle") + <|> (guard hasMaintitle >> + guard (not isChapterlike) >> + getTitle "maintitle") + <|> getTitle "title") + <|> return Nothing + + subtitle' <- (guard isPeriodical >> getTitle "issuesubtitle") + <|> (guard hasMaintitle >> + guard (not isChapterlike) >> + getTitle "mainsubtitle") + <|> getTitle "subtitle" + <|> return mempty + titleaddon' <- (guard hasMaintitle >> + guard (not isChapterlike) >> + getTitle "maintitleaddon") + <|> getTitle "titleaddon" + <|> return mempty + + volumeTitle' <- Just <$> + ((guard hasMaintitle >> + guard (not isChapterlike) >> + getTitle "title") + <|> (guard hasMaintitle >> + guard isChapterlike >> + getTitle "booktitle")) + <|> return Nothing + volumeSubtitle' <- (guard hasMaintitle >> + guard (not isChapterlike) >> + getTitle "subtitle") + <|> (guard hasMaintitle >> + guard isChapterlike >> + getTitle "booksubtitle") + <|> return mempty + volumeTitleAddon' <- (guard hasMaintitle >> + guard (not isChapterlike) >> + getTitle "titleaddon") + <|> (guard hasMaintitle >> + guard isChapterlike >> + getTitle "booktitleaddon") + <|> return mempty + + containerTitle' <- Just <$> + ((guard isPeriodical >> getPeriodicalTitle "title") + <|> (guard isChapterlike >> getTitle "maintitle") + <|> (guard isChapterlike >> getTitle "booktitle") + <|> getPeriodicalTitle "journaltitle" + <|> getPeriodicalTitle "journal") + <|> return Nothing + containerSubtitle' <- (guard isPeriodical >> getPeriodicalTitle "subtitle") + <|> (guard isChapterlike >> getTitle "mainsubtitle") + <|> (guard isChapterlike >> getTitle "booksubtitle") + <|> getPeriodicalTitle "journalsubtitle" + <|> return mempty + containerTitleAddon' <- (guard isPeriodical >> + getPeriodicalTitle "titleaddon") + <|> (guard isChapterlike >> + getTitle "maintitleaddon") + <|> (guard isChapterlike >> + getTitle "booktitleaddon") + <|> return mempty + containerTitleShort' <- Just <$> + ((guard isPeriodical >> + guard (not hasMaintitle) >> + getField "shorttitle") + <|> getPeriodicalTitle "shortjournal") + <|> return Nothing + + -- change numerical series title to e.g. 'series 3' + let fixSeriesTitle [Str xs] | isNumber xs = + [Str (ordinalize locale xs), Space, Str (resolveKey' lang "jourser")] + fixSeriesTitle xs = xs + seriesTitle' <- (Just . B.fromList . fixSeriesTitle . + B.toList . resolveKey lang <$> + getTitle "series") <|> + return Nothing + shortTitle' <- Just <$> + ((guard (not hasMaintitle || isChapterlike) >> + getTitle "shorttitle") + <|> if (subtitle' /= mempty || titleaddon' /= mempty) && + not hasMaintitle + then getShortTitle False "title" + else getShortTitle True "title") + <|> return Nothing + + eventTitle' <- Just <$> getTitle "eventtitle" <|> return Nothing + origTitle' <- Just <$> getTitle "origtitle" <|> return Nothing + + -- publisher + pubfields <- mapM (\f -> Just `fmap` + (if variant == Bibtex || f == "howpublished" + then getField f + else getLiteralList' f) + <|> return Nothing) + ["school","institution","organization", "howpublished","publisher"] + let publisher' = case catMaybes pubfields of + [] -> Nothing + xs -> Just $ concatWith ';' xs + origpublisher' <- (Just <$> getField "origpublisher") <|> return Nothing + + -- places + venue' <- (Just <$> getField "venue") <|> return Nothing + address' <- Just <$> + (if variant == Bibtex + then getField "address" + else getLiteralList' "address" + <|> (guard (et /= "patent") >> + getLiteralList' "location")) + <|> return Nothing + origLocation' <- Just <$> + (if variant == Bibtex + then getField "origlocation" + else getLiteralList' "origlocation") + <|> return Nothing + jurisdiction' <- if reftype == "patent" + then Just <$> + (concatWith ';' . map (resolveKey lang) <$> + getLiteralList "location") <|> return Nothing + else return Nothing + + -- url, doi, isbn, etc.: + -- note that with eprinttype = arxiv, we take eprint to be a partial url + -- archivePrefix is an alias for eprinttype + url' <- (guard (et == "online" || lookup "url" opts /= Just "false") + >> Just <$> getRawField "url") + <|> (do etype <- getRawField "eprinttype" + eprint <- getRawField "eprint" + let baseUrl = + case T.toLower etype of + "arxiv" -> "http://arxiv.org/abs/" + "jstor" -> "http://www.jstor.org/stable/" + "pubmed" -> "http://www.ncbi.nlm.nih.gov/pubmed/" + "googlebooks" -> "http://books.google.com?id=" + _ -> "" + if T.null baseUrl + then mzero + else return $ Just $ baseUrl <> eprint) + <|> return Nothing + doi' <- (guard (lookup "doi" opts /= Just "false") >> + Just <$> getRawField "doi") + <|> return Nothing + isbn' <- Just <$> getRawField "isbn" <|> return Nothing + issn' <- Just <$> getRawField "issn" <|> return Nothing + pmid' <- Just <$> getRawField "pmid" <|> return Nothing + pmcid' <- Just <$> getRawField "pmcid" <|> return Nothing + callNumber' <- Just <$> getRawField "library" <|> return Nothing + + -- notes + annotation' <- Just <$> + (getField "annotation" <|> getField "annote") + <|> return Nothing + abstract' <- Just <$> getField "abstract" <|> return Nothing + keywords' <- Just <$> getField "keywords" <|> return Nothing + note' <- if et == "periodical" + then return Nothing + else Just <$> getField "note" <|> return Nothing + addendum' <- if variant == Bibtex + then return Nothing + else Just <$> getField "addendum" + <|> return Nothing + pubstate' <- ( (Just . resolveKey lang <$> getField "pubstate") + <|> case dateLiteral <$> issued' of + Just (Just "forthcoming") -> + return $ Just $ B.str "forthcoming" + _ -> return Nothing + ) + + + + + let addField (_, Nothing) = id + addField (f, Just x) = Map.insert f x + let vars = foldr addField mempty + [ ("other-ids", TextVal <$> otherIds) + , ("genre", TextVal <$> genre) + , ("language", TextVal <$> hyphenation) + -- dates + , ("accessed", DateVal <$> accessed') + , ("event-date", DateVal <$> eventDate') + , ("issued", DateVal <$> issued') + , ("original-date", DateVal <$> origDate') + -- names + , ("author", NamesVal <$> author') + , ("editor", NamesVal <$> editor') + , ("translator", NamesVal <$> translator') + , ("director", NamesVal <$> director') + , ("container-author", NamesVal <$> containerAuthor') + -- locators + , ("page", FancyVal . Walk.walk convertEnDash <$> pages') + , ("number-of-pages", FancyVal <$> pagetotal') + , ("volume", case (volume', part') of + (Nothing, Nothing) -> Nothing + (Just v, Nothing) -> Just $ FancyVal v + (Nothing, Just p) -> Just $ FancyVal p + (Just v, Just p) -> + Just $ FancyVal $ v <> B.str "." <> p) + , ("number-of-volumes", FancyVal <$> volumes') + , ("chapter-number", FancyVal <$> chapter') + , ("edition", FancyVal <$> edition') + , ("version", FancyVal <$> version') + , ("number", FancyVal <$> number') + , ("collection-number", FancyVal <$> collectionNumber') + , ("issue", FancyVal <$> issue') + -- title + , ("original-title", FancyVal <$> origTitle') + , ("event", FancyVal <$> eventTitle') + , ("title", case title' of + Just t -> Just $ FancyVal $ + concatWith '.' [ + concatWith ':' [t, subtitle'] + , titleaddon' ] + Nothing -> Nothing) + , ("volume-title", + case volumeTitle' of + Just t -> Just $ FancyVal $ + concatWith '.' [ + concatWith ':' [t, volumeSubtitle'] + , volumeTitleAddon' ] + Nothing -> Nothing) + , ("container-title", + case containerTitle' of + Just t -> Just $ FancyVal $ + concatWith '.' [ + concatWith ':' [t, + containerSubtitle'] + , containerTitleAddon' ] + Nothing -> Nothing) + , ("container-title-short", FancyVal <$> containerTitleShort') + , ("collection-title", FancyVal <$> seriesTitle') + , ("title-short", FancyVal <$> shortTitle') + -- publisher + , ("publisher", FancyVal <$> publisher') + , ("original-publisher", FancyVal <$> origpublisher') + -- places + , ("jurisdiction", FancyVal <$> jurisdiction') + , ("event-place", FancyVal <$> venue') + , ("publisher-place", FancyVal <$> address') + , ("original-publisher-place", FancyVal <$> origLocation') + -- urls + , ("url", TextVal <$> url') + , ("doi", TextVal <$> doi') + , ("isbn", TextVal <$> isbn') + , ("issn", TextVal <$> issn') + , ("pmcid", TextVal <$> pmcid') + , ("pmid", TextVal <$> pmid') + , ("call-number", TextVal <$> callNumber') + -- notes + , ("note", case catMaybes [note', addendum'] of + [] -> Nothing + xs -> return $ FancyVal $ concatWith '.' xs) + , ("annote", FancyVal <$> annotation') + , ("abstract", FancyVal <$> abstract') + , ("keyword", FancyVal <$> keywords') + , ("status", FancyVal <$> pubstate') + ] + return $ Reference + { referenceId = ItemId id' + , referenceType = reftype + , referenceDisambiguation = Nothing + , referenceVariables = vars } + + +bib :: Item -> Bib a -> BibParser a +bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US"))) + +resolveCrossRefs :: Variant -> [Item] -> [Item] +resolveCrossRefs variant entries = + map (resolveCrossRef variant entries) entries + +resolveCrossRef :: Variant -> [Item] -> Item -> Item +resolveCrossRef variant entries entry = + Map.foldrWithKey go entry (fields entry) + where go key val entry' = + if key == "crossref" || key == "xdata" + then entry'{ fields = fields entry' <> + Map.fromList (getXrefFields variant + entry entries val) } + else entry' + +getXrefFields :: Variant -> Item -> [Item] -> Text -> [(Text, Text)] +getXrefFields variant baseEntry entries keys = do + let keys' = splitKeys keys + xrefEntry <- [e | e <- entries, identifier e `elem` keys'] + (k, v) <- Map.toList $ fields xrefEntry + if k == "crossref" || k == "xdata" + then do + xs <- mapM (getXrefFields variant baseEntry entries) + (splitKeys v) + (x, y) <- xs + guard $ isNothing $ Map.lookup x $ fields xrefEntry + return (x, y) + else do + k' <- case variant of + Bibtex -> return k + Biblatex -> transformKey + (entryType xrefEntry) (entryType baseEntry) k + guard $ isNothing $ Map.lookup k' $ fields baseEntry + return (k',v) + + + +data BibState = BibState{ + untitlecase :: Bool + , localeLang :: Lang + } + +type Bib = RWST Item () BibState BibParser + +blocksToInlines :: [Block] -> Inlines +blocksToInlines bs = + case bs of + [Plain xs] -> B.fromList xs + [Para xs] -> B.fromList xs + _ -> B.fromList $ Walk.query (:[]) bs + +adjustSpans :: Lang -> Inline -> Inline +adjustSpans lang (RawInline (Format "latex") s) + | s == "\\hyphen" || s == "\\hyphen " = Str "-" + | otherwise = parseRawLaTeX lang s +adjustSpans _ SoftBreak = Space +adjustSpans _ x = x + +parseRawLaTeX :: Lang -> Text -> Inline +parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) = + case parseLaTeX lang contents of + Right [Para ys] -> f command ys + Right [Plain ys] -> f command ys + Right [] -> f command [] + _ -> RawInline (Format "latex") t + where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs + command = T.strip command' + contents = T.drop 1 $ T.dropEnd 1 contents' + f "mkbibquote" ils = Span nullAttr [Quoted DoubleQuote ils] + f "mkbibemph" ils = Span nullAttr [Emph ils] + f "mkbibitalic" ils = Span nullAttr [Emph ils] + f "mkbibbold" ils = Span nullAttr [Strong ils] + f "mkbibparens" ils = Span nullAttr $ + [Str "("] ++ ils ++ [Str ")"] + f "mkbibbrackets" ils = Span nullAttr $ + [Str "["] ++ ils ++ [Str "]"] + -- ... both should be nestable & should work in year fields + f "autocap" ils = Span nullAttr ils + -- TODO: should work in year fields + f "textnormal" ils = Span ("",["nodecor"],[]) ils + f "bibstring" [Str s] = Str $ resolveKey' lang s + f "adddot" [] = Str "." + f "adddotspace" [] = Span nullAttr [Str ".", Space] + f "addabbrvspace" [] = Space + f _ ils = Span nullAttr ils +parseRawLaTeX _ t = RawInline (Format "latex") t + +latex' :: Text -> Bib [Block] +latex' t = do + lang <- gets localeLang + case parseLaTeX lang t of + Left _ -> mzero + Right bs -> return bs + +parseLaTeX :: Lang -> Text -> Either PandocError [Block] +parseLaTeX lang t = + case runPure (readLaTeX + def{ readerExtensions = + extensionsFromList [Ext_raw_tex, Ext_smart] } t) of + Left e -> Left e + Right (Pandoc _ bs) -> Right $ Walk.walk (adjustSpans lang) bs + +latex :: Text -> Bib Inlines +latex = fmap blocksToInlines . latex' . T.strip + +type Options = [(Text, Text)] + +parseOptions :: Text -> Options +parseOptions = map breakOpt . T.splitOn "," + where breakOpt x = case T.break (=='=') x of + (w,v) -> (T.toLower $ T.strip w, + T.toLower $ T.strip $ T.drop 1 v) + +bibEntries :: BibParser [Item] +bibEntries = do + skipMany nonEntry + many (bibItem <* skipMany nonEntry) + where nonEntry = bibSkip <|> + try (char '@' >> + (bibComment <|> bibPreamble <|> bibString)) + +bibSkip :: BibParser () +bibSkip = skipMany1 (satisfy (/='@')) + +bibComment :: BibParser () +bibComment = do + cistring "comment" + spaces + void inBraces <|> bibSkip <|> return () + +bibPreamble :: BibParser () +bibPreamble = do + cistring "preamble" + spaces + void inBraces + +bibString :: BibParser () +bibString = do + cistring "string" + spaces + char '{' + spaces + (k,v) <- entField + char '}' + updateState (\(l,m) -> (l, Map.insert k v m)) + return () + +inBraces :: BibParser Text +inBraces = try $ do + char '{' + res <- manyTill + ( (T.pack <$> many1 (noneOf "{}\\")) + <|> (char '\\' >> ( (char '{' >> return "\\{") + <|> (char '}' >> return "\\}") + <|> return "\\")) + <|> (braced <$> inBraces) + ) (char '}') + return $ T.concat res + +braced :: Text -> Text +braced = T.cons '{' . flip T.snoc '}' + +inQuotes :: BibParser Text +inQuotes = do + char '"' + T.concat <$> manyTill + ( (T.pack <$> many1 (noneOf "\"\\{")) + <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) + <|> braced <$> inBraces + ) (char '"') + +fieldName :: BibParser Text +fieldName = resolveAlias . T.toLower . T.pack + <$> many1 (letter <|> digit <|> oneOf "-_:+") + +isBibtexKeyChar :: Char -> Bool +isBibtexKeyChar c = + isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*&" :: [Char]) + +bibItem :: BibParser Item +bibItem = do + char '@' + pos <- getPosition + enttype <- map toLower <$> many1 letter + spaces + char '{' + spaces + entid <- many1 (satisfy isBibtexKeyChar) + spaces + char ',' + spaces + entfields <- entField `sepEndBy` (char ',' >> spaces) + spaces + char '}' + return $ Item (T.pack entid) pos (T.pack enttype) (Map.fromList entfields) + +entField :: BibParser (Text, Text) +entField = do + k <- fieldName + spaces + char '=' + spaces + vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy` + try (spaces >> char '#' >> spaces) + spaces + return (k, T.concat vs) + +resolveAlias :: Text -> Text +resolveAlias "archiveprefix" = "eprinttype" +resolveAlias "primaryclass" = "eprintclass" +resolveAlias s = s + +rawWord :: BibParser Text +rawWord = T.pack <$> many1 alphaNum + +expandString :: BibParser Text +expandString = do + k <- fieldName + (lang, strs) <- getState + case Map.lookup k strs of + Just v -> return v + Nothing -> return $ resolveKey' lang k + +cistring :: Text -> BibParser Text +cistring s = try (go s) + where go t = case T.uncons t of + Nothing -> return "" + Just (c,cs) -> do + x <- char (toLower c) <|> char (toUpper c) + xs <- go cs + return (T.cons x xs) + +splitKeys :: Text -> [Text] +splitKeys = filter (not . T.null) . T.split (\c -> c == ' ' || c == ',') + +-- Biblatex Localization Keys (see Biblatex manual) +-- Currently we only map a subset likely to be used in Biblatex *databases* +-- (in fields such as `type`, and via `\bibstring{}` commands). + +parseMonth :: Text -> Maybe Int +parseMonth s = + case T.toLower s of + "jan" -> Just 1 + "feb" -> Just 2 + "mar" -> Just 3 + "apr" -> Just 4 + "may" -> Just 5 + "jun" -> Just 6 + "jul" -> Just 7 + "aug" -> Just 8 + "sep" -> Just 9 + "oct" -> Just 10 + "nov" -> Just 11 + "dec" -> Just 12 + _ -> readMay (T.unpack s) + +notFound :: Text -> Bib a +notFound f = Prelude.fail $ T.unpack f ++ " not found" + +getField :: Text -> Bib Inlines +getField f = do + fs <- asks fields + case Map.lookup f fs of + Just x -> latex x + Nothing -> notFound f + + +getPeriodicalTitle :: Text -> Bib Inlines +getPeriodicalTitle f = do + ils <- getField f + return ils + +protectCase :: (Inlines -> Inlines) -> (Inlines -> Inlines) +protectCase f = Walk.walk unprotect . f . Walk.walk protect + where + protect (Span ("",[],[]) xs) = Span ("",["nocase"],[]) xs + protect x = x + unprotect (Span ("",["nocase"],[]) xs) + | hasLowercaseWord xs = Span ("",["nocase"],[]) xs + | otherwise = Span ("",[],[]) xs + unprotect x = x + hasLowercaseWord = any startsWithLowercase . splitStrWhen isPunctuation + startsWithLowercase (Str (T.uncons -> Just (x,_))) = isLower x + startsWithLowercase _ = False + +unTitlecase :: Maybe Lang -> Inlines -> Inlines +unTitlecase mblang = protectCase (caseTransform (withSentenceCase mblang)) + +getTitle :: Text -> Bib Inlines +getTitle f = do + ils <- getField f + utc <- gets untitlecase + lang <- gets localeLang + let processTitle = if utc then unTitlecase (Just lang) else id + return $ processTitle ils + +getShortTitle :: Bool -> Text -> Bib Inlines +getShortTitle requireColon f = do + ils <- splitStrWhen (==':') . B.toList <$> getTitle f + if not requireColon || containsColon ils + then return $ B.fromList $ upToColon ils + else return mempty + +containsColon :: [Inline] -> Bool +containsColon xs = Str ":" `elem` xs + +upToColon :: [Inline] -> [Inline] +upToColon xs = takeWhile (/= Str ":") xs + +isNumber :: Text -> Bool +isNumber t = case T.uncons t of + Just ('-', ds) -> T.all isDigit ds + Just _ -> T.all isDigit t + Nothing -> False + +getDate :: Text -> Bib Date +getDate f = do + -- the ~ can used for approx dates, but the latex reader + -- parses this as a nonbreaking space, so we need to convert it back! + let nbspToTilde '\160' = '~' + nbspToTilde c = c + mbd <- rawDateEDTF . T.map nbspToTilde <$> getRawField f + case mbd of + Nothing -> Prelude.fail "expected date" + Just d -> return d + +-- A negative (BC) year might be written with -- or --- in bibtex: +fixLeadingDash :: Text -> Text +fixLeadingDash t = case T.uncons t of + Just (c, ds) | (c == '–' || c == '—') && firstIsDigit ds -> T.cons '–' ds + _ -> t + where firstIsDigit = maybe False (isDigit . fst) . T.uncons + +getOldDate :: Text -> Bib Date +getOldDate prefix = do + year' <- (readMay . T.unpack . fixLeadingDash . stringify + <$> getField (prefix <> "year")) <|> return Nothing + month' <- (parseMonth <$> getRawField (prefix <> "month")) + <|> return Nothing + day' <- (readMay . T.unpack <$> getRawField (prefix <> "day")) + <|> return Nothing + endyear' <- (readMay . T.unpack . fixLeadingDash . stringify + <$> getField (prefix <> "endyear")) <|> return Nothing + endmonth' <- (parseMonth . stringify + <$> getField (prefix <> "endmonth")) <|> return Nothing + endday' <- (readMay . T.unpack . stringify <$> + getField (prefix <> "endday")) <|> return Nothing + let toDateParts (y', m', d') = + DateParts $ + case y' of + Nothing -> [] + Just y -> + case m' of + Nothing -> [y] + Just m -> + case d' of + Nothing -> [y,m] + Just d -> [y,m,d] + let dateparts = filter (\x -> x /= DateParts []) + $ map toDateParts [(year',month',day'), + (endyear',endmonth',endday')] + literal <- if null dateparts + then Just <$> getRawField (prefix <> "year") + else return Nothing + return $ + Date { dateParts = dateparts + , dateCirca = False + , dateSeason = Nothing + , dateLiteral = literal } + +getRawField :: Text -> Bib Text +getRawField f = + (stringify <$> getField f) + <|> do fs <- asks fields + case Map.lookup f fs of + Just x -> return x + Nothing -> notFound f + +getLiteralList :: Text -> Bib [Inlines] +getLiteralList f = do + fs <- asks fields + case Map.lookup f fs of + Just x -> latex' x >>= toLiteralList + Nothing -> notFound f + +-- separates items with semicolons +getLiteralList' :: Text -> Bib Inlines +getLiteralList' f = do + fs <- asks fields + case Map.lookup f fs of + Just x -> do + x' <- latex' x + case x' of + [Para xs] -> + return $ B.fromList + $ intercalate [Str ";", Space] + $ splitByAnd xs + [Plain xs] -> + return $ B.fromList + $ intercalate [Str ";", Space] + $ splitByAnd xs + _ -> mzero + Nothing -> notFound f + +splitByAnd :: [Inline] -> [[Inline]] +splitByAnd = splitOn [Space, Str "and", Space] + +toLiteralList :: [Block] -> Bib [Inlines] +toLiteralList [Para xs] = + return $ map B.fromList $ splitByAnd xs +toLiteralList [Plain xs] = toLiteralList [Para xs] +toLiteralList _ = mzero + +concatWith :: Char -> [Inlines] -> Inlines +concatWith sep = foldl' go mempty + where go :: Inlines -> Inlines -> Inlines + go accum s + | s == mempty = accum + | otherwise = + case Seq.viewr (B.unMany accum) of + Seq.EmptyR -> s + _ Seq.:> Str x + | not (T.null x) && + T.last x `elem` ("!?.,:;" :: String) + -> accum <> B.space <> s + _ -> accum <> B.str (T.singleton sep) <> + B.space <> s + + +getNameList :: Options -> Text -> Bib [Name] +getNameList opts f = do + fs <- asks fields + case Map.lookup f fs of + Just x -> latexNames opts x + Nothing -> notFound f + +toNameList :: Options -> [Block] -> Bib [Name] +toNameList opts [Para xs] = + filter (/= emptyName) <$> mapM (toName opts . addSpaceAfterPeriod) + (splitByAnd xs) +toNameList opts [Plain xs] = toNameList opts [Para xs] +toNameList _ _ = mzero + +latexNames :: Options -> Text -> Bib [Name] +latexNames opts t = latex' (T.strip t) >>= toNameList opts + +-- see issue 392 for motivation. We want to treat +-- "J.G. Smith" and "J. G. Smith" the same. +addSpaceAfterPeriod :: [Inline] -> [Inline] +addSpaceAfterPeriod = go . splitStrWhen (=='.') + where + go [] = [] + go (Str (T.unpack -> [c]):Str ".":Str (T.unpack -> [d]):xs) + | isLetter d + , isLetter c + , isUpper c + , isUpper d + = Str (T.singleton c):Str ".":Space:go (Str (T.singleton d):xs) + go (x:xs) = x:go xs + +emptyName :: Name +emptyName = + Name { nameFamily = Nothing + , nameGiven = Nothing + , nameDroppingParticle = Nothing + , nameNonDroppingParticle = Nothing + , nameSuffix = Nothing + , nameLiteral = Nothing + , nameCommaSuffix = False + , nameStaticOrdering = False + } + +toName :: Options -> [Inline] -> Bib Name +toName _ [Str "others"] = + return emptyName{ nameLiteral = Just "others" } +toName _ [Span ("",[],[]) ils] = -- corporate author + return emptyName{ nameLiteral = Just $ stringify ils } + -- extended BibLaTeX name format - see #266 +toName _ ils@(Str ys:_) | T.any (== '=') ys = do + let commaParts = splitWhen (== Str ",") + . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160') + $ ils + let addPart ag (Str "given" : Str "=" : xs) = + ag{ nameGiven = case nameGiven ag of + Nothing -> Just $ stringify xs + Just t -> Just $ t <> " " <> stringify xs } + addPart ag (Str "family" : Str "=" : xs) = + ag{ nameFamily = Just $ stringify xs } + addPart ag (Str "prefix" : Str "=" : xs) = + ag{ nameDroppingParticle = Just $ stringify xs } + addPart ag (Str "useprefix" : Str "=" : Str "true" : _) = + ag{ nameNonDroppingParticle = nameDroppingParticle ag + , nameDroppingParticle = Nothing } + addPart ag (Str "suffix" : Str "=" : xs) = + ag{ nameSuffix = Just $ stringify xs } + addPart ag (Space : xs) = addPart ag xs + addPart ag _ = ag + return $ foldl' addPart emptyName commaParts +-- First von Last +-- von Last, First +-- von Last, Jr ,First +-- NOTE: biblatex and bibtex differ on: +-- Drummond de Andrade, Carlos +-- bibtex takes "Drummond de" as the von; +-- biblatex takes the whole as a last name. +-- See https://github.com/plk/biblatex/issues/236 +-- Here we implement the more sensible biblatex behavior. +toName opts ils = do + let useprefix = optionSet "useprefix" opts + let usecomma = optionSet "juniorcomma" opts + let bibtex = optionSet "bibtex" opts + let words' = wordsBy (\x -> x == Space || x == Str "\160") + let commaParts = map words' $ splitWhen (== Str ",") + $ splitStrWhen + (\c -> c == ',' || c == '\160') ils + let (first, vonlast, jr) = + case commaParts of + --- First is the longest sequence of white-space separated + -- words starting with an uppercase and that is not the + -- whole string. von is the longest sequence of whitespace + -- separated words whose last word starts with lower case + -- and that is not the whole string. + [fvl] -> let (caps', rest') = span isCapitalized fvl + in if null rest' && not (null caps') + then (init caps', [last caps'], []) + else (caps', rest', []) + [vl,f] -> (f, vl, []) + (vl:j:f:_) -> (f, vl, j ) + [] -> ([], [], []) + + let (von, lastname) = + if bibtex + then case span isCapitalized $ reverse vonlast of + ([],w:ws) -> (reverse ws, [w]) + (vs, ws) -> (reverse ws, reverse vs) + else case break isCapitalized vonlast of + (vs@(_:_), []) -> (init vs, [last vs]) + (vs, ws) -> (vs, ws) + let prefix = T.unwords $ map stringify von + let family = T.unwords $ map stringify lastname + let suffix = T.unwords $ map stringify jr + let given = T.unwords $ map stringify first + return + Name { nameFamily = if T.null family + then Nothing + else Just family + , nameGiven = if T.null given + then Nothing + else Just given + , nameDroppingParticle = if useprefix || T.null prefix + then Nothing + else Just prefix + , nameNonDroppingParticle = if useprefix && not (T.null prefix) + then Just prefix + else Nothing + , nameSuffix = if T.null suffix + then Nothing + else Just suffix + , nameLiteral = Nothing + , nameCommaSuffix = usecomma + , nameStaticOrdering = False + } + +splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline] +splitStrWhen _ [] = [] +splitStrWhen p (Str xs : ys) = map Str (go xs) ++ splitStrWhen p ys + where go s = + let (w,z) = T.break p s + in if T.null z + then if T.null w + then [] + else [w] + else if T.null w + then (T.take 1 z : go (T.drop 1 z)) + else (w : T.take 1 z : go (T.drop 1 z)) +splitStrWhen p (x : ys) = x : splitStrWhen p ys + +ordinalize :: Locale -> Text -> Text +ordinalize locale n = + let terms = localeTerms locale + pad0 t = case T.length t of + 0 -> "00" + 1 -> "0" <> t + _ -> t + in case Map.lookup ("ordinal-" <> pad0 n) terms <|> + Map.lookup "ordinal" terms of + Nothing -> n + Just [] -> n + Just (t:_) -> n <> snd t + +isCapitalized :: [Inline] -> Bool +isCapitalized (Str (T.uncons -> Just (c,cs)) : rest) + | isUpper c = True + | isDigit c = isCapitalized (Str cs : rest) + | otherwise = False +isCapitalized (_:rest) = isCapitalized rest +isCapitalized [] = True + +optionSet :: Text -> Options -> Bool +optionSet key opts = case lookup key opts of + Just "true" -> True + Just s -> s == mempty + _ -> False + +getTypeAndGenre :: Bib (Text, Maybe Text) +getTypeAndGenre = do + lang <- gets localeLang + et <- asks entryType + guard $ et /= "xdata" + reftype' <- resolveKey' lang <$> getRawField "type" + <|> return mempty + st <- getRawField "entrysubtype" <|> return mempty + isEvent <- (True <$ (getRawField "eventdate" + <|> getRawField "eventtitle" + <|> getRawField "venue")) <|> return False + let reftype = + case et of + "article" + | st == "magazine" -> "article-magazine" + | st == "newspaper" -> "article-newspaper" + | otherwise -> "article-journal" + "book" -> "book" + "booklet" -> "pamphlet" + "bookinbook" -> "chapter" + "collection" -> "book" + "dataset" -> "dataset" + "electronic" -> "webpage" + "inbook" -> "chapter" + "incollection" -> "chapter" + "inreference" -> "entry-encyclopedia" + "inproceedings" -> "paper-conference" + "manual" -> "book" + "mastersthesis" -> "thesis" + "misc" -> "" + "mvbook" -> "book" + "mvcollection" -> "book" + "mvproceedings" -> "book" + "mvreference" -> "book" + "online" -> "webpage" + "patent" -> "patent" + "periodical" + | st == "magazine" -> "article-magazine" + | st == "newspaper" -> "article-newspaper" + | otherwise -> "article-journal" + "phdthesis" -> "thesis" + "proceedings" -> "book" + "reference" -> "book" + "report" -> "report" + "software" -> "book" -- no "software" type in CSL + "suppbook" -> "chapter" + "suppcollection" -> "chapter" + "suppperiodical" + | st == "magazine" -> "article-magazine" + | st == "newspaper" -> "article-newspaper" + | otherwise -> "article-journal" + "techreport" -> "report" + "thesis" -> "thesis" + "unpublished" -> if isEvent then "speech" else "manuscript" + "www" -> "webpage" + -- biblatex, "unsupported" + "artwork" -> "graphic" + "audio" -> "song" -- for audio *recordings* + "commentary" -> "book" + "image" -> "graphic" -- or "figure" ? + "jurisdiction" -> "legal_case" + "legislation" -> "legislation" -- or "bill" ? + "legal" -> "treaty" + "letter" -> "personal_communication" + "movie" -> "motion_picture" + "music" -> "song" -- for musical *recordings* + "performance" -> "speech" + "review" -> "review" -- or "review-book" ? + "standard" -> "legislation" + "video" -> "motion_picture" + -- biblatex-apa: + "data" -> "dataset" + "letters" -> "personal_communication" + "newsarticle" -> "article-newspaper" + _ -> "" + + let refgenre = + case et of + "mastersthesis" -> if T.null reftype' + then Just $ resolveKey' lang "mathesis" + else Just reftype' + "phdthesis" -> if T.null reftype' + then Just $ resolveKey' lang "phdthesis" + else Just reftype' + _ -> if T.null reftype' + then Nothing + else Just reftype' + return (reftype, refgenre) + + +-- transformKey source target key +-- derived from Appendix C of bibtex manual +transformKey :: Text -> Text -> Text -> [Text] +transformKey _ _ "ids" = [] +transformKey _ _ "crossref" = [] +transformKey _ _ "xref" = [] +transformKey _ _ "entryset" = [] +transformKey _ _ "entrysubtype" = [] +transformKey _ _ "execute" = [] +transformKey _ _ "label" = [] +transformKey _ _ "options" = [] +transformKey _ _ "presort" = [] +transformKey _ _ "related" = [] +transformKey _ _ "relatedoptions" = [] +transformKey _ _ "relatedstring" = [] +transformKey _ _ "relatedtype" = [] +transformKey _ _ "shorthand" = [] +transformKey _ _ "shorthandintro" = [] +transformKey _ _ "sortkey" = [] +transformKey x y "author" + | x `elem` ["mvbook", "book"] && + y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"] +-- note: this next clause is not in the biblatex manual, but it makes +-- sense in the context of CSL conversion: +transformKey x y "author" + | x == "mvbook" && y == "book" = ["bookauthor", "author"] +transformKey "mvbook" y z + | y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z +transformKey x y z + | x `elem` ["mvcollection", "mvreference"] && + y `elem` ["collection", "reference", "incollection", "inreference", + "suppcollection"] = standardTrans z +transformKey "mvproceedings" y z + | y `elem` ["proceedings", "inproceedings"] = standardTrans z +transformKey "book" y z + | y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z +transformKey x y z + | x `elem` ["collection", "reference"] && + y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z +transformKey "proceedings" "inproceedings" z = bookTrans z +transformKey "periodical" y z + | y `elem` ["article", "suppperiodical"] = + case z of + "title" -> ["journaltitle"] + "subtitle" -> ["journalsubtitle"] + "shorttitle" -> [] + "sorttitle" -> [] + "indextitle" -> [] + "indexsorttitle" -> [] + _ -> [z] +transformKey _ _ x = [x] + +standardTrans :: Text -> [Text] +standardTrans z = + case z of + "title" -> ["maintitle"] + "subtitle" -> ["mainsubtitle"] + "titleaddon" -> ["maintitleaddon"] + "shorttitle" -> [] + "sorttitle" -> [] + "indextitle" -> [] + "indexsorttitle" -> [] + _ -> [z] + +bookTrans :: Text -> [Text] +bookTrans z = + case z of + "title" -> ["booktitle"] + "subtitle" -> ["booksubtitle"] + "titleaddon" -> ["booktitleaddon"] + "shorttitle" -> [] + "sorttitle" -> [] + "indextitle" -> [] + "indexsorttitle" -> [] + _ -> [z] + +resolveKey :: Lang -> Inlines -> Inlines +resolveKey lang ils = Walk.walk go ils + where go (Str s) = Str $ resolveKey' lang s + go x = x + +resolveKey' :: Lang -> Text -> Text +resolveKey' lang@(Lang l _) k = + case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of + Nothing -> k + Just (x, _) -> either (const k) stringify $ parseLaTeX lang x + +convertEnDash :: Inline -> Inline +convertEnDash (Str s) = Str (T.map (\c -> if c == '–' then '-' else c) s) +convertEnDash x = x |