diff options
Diffstat (limited to 'src/Text/Pandoc/Citeproc')
| -rw-r--r-- | src/Text/Pandoc/Citeproc/BibTeX.hs | 357 | ||||
| -rw-r--r-- | src/Text/Pandoc/Citeproc/Data.hs | 13 | ||||
| -rw-r--r-- | src/Text/Pandoc/Citeproc/Locator.hs | 24 | ||||
| -rw-r--r-- | src/Text/Pandoc/Citeproc/MetaValue.hs | 8 |
4 files changed, 331 insertions, 71 deletions
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 552339df0..c178de6e9 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -1,4 +1,5 @@ {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE FlexibleContexts #-} @@ -17,6 +18,7 @@ module Text.Pandoc.Citeproc.BibTeX ( Variant(..) , readBibtexString + , writeBibtexString ) where @@ -24,10 +26,11 @@ import Text.Pandoc.Definition import Text.Pandoc.Builder as B import Text.Pandoc.Readers.LaTeX (readLaTeX) import Text.Pandoc.Extensions (Extension(..), extensionsFromList) -import Text.Pandoc.Options (ReaderOptions(..)) -import Text.Pandoc.Class (runPure) +import Text.Pandoc.Options (ReaderOptions(..), WriterOptions) import Text.Pandoc.Error (PandocError) import Text.Pandoc.Shared (stringify) +import Text.Pandoc.Writers.LaTeX (writeLaTeX) +import Text.Pandoc.Class (runPure) import qualified Text.Pandoc.Walk as Walk import Citeproc.Types import Citeproc.Pandoc () @@ -46,17 +49,21 @@ import qualified Data.Sequence as Seq import Data.Char (isAlphaNum, isDigit, isLetter, isUpper, toLower, toUpper, isLower, isPunctuation) -import Data.List (foldl', intercalate) +import Data.List (foldl', intercalate, intersperse) import Safe (readMay) +import Text.Printf (printf) +import Text.DocLayout (literal, hsep, nest, hang, Doc(..), + braces, ($$), cr) data Variant = Bibtex | Biblatex deriving (Show, Eq, Ord) -- | Parse BibTeX or BibLaTeX into a list of 'Reference's. -readBibtexString :: Variant -- ^ bibtex or biblatex +readBibtexString :: ToSources a + => Variant -- ^ bibtex or biblatex -> Locale -- ^ Locale -> (Text -> Bool) -- ^ Filter on citation ids - -> Text -- ^ bibtex/biblatex text + -> a -- ^ bibtex/biblatex text -> Either ParseError [Reference Inlines] readBibtexString variant locale idpred contents = do case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>= @@ -64,17 +71,280 @@ readBibtexString variant locale idpred contents = do filter (\item -> idpred (identifier item) && entryType item /= "xdata")) (fromMaybe defaultLang $ localeLanguage locale, Map.empty) - "" contents of + (initialSourceName sources) sources of Left err -> Left err Right xs -> return xs + where + sources = toSources contents + +-- | Write BibTeX or BibLaTeX given given a 'Reference'. +writeBibtexString :: WriterOptions -- ^ options (for writing LaTex) + -> Variant -- ^ bibtex or biblatex + -> Maybe Lang -- ^ Language + -> Reference Inlines -- ^ Reference to write + -> Doc Text +writeBibtexString opts variant mblang ref = + "@" <> bibtexType <> "{" <> literal (unItemId (referenceId ref)) <> "," + $$ nest 2 (renderFields fs) + $$ "}" <> cr + + where + bibtexType = + case referenceType ref of + "article-magazine" -> "article" + "article-newspaper" -> "article" + "article-journal" -> "article" + "book" -> "book" + "pamphlet" -> "booklet" + "dataset" | variant == Biblatex -> "dataset" + "webpage" | variant == Biblatex -> "online" + "chapter" -> case getVariable "editor" of + Just _ -> "incollection" + Nothing -> "inbook" + "entry-encyclopedia" | variant == Biblatex -> "inreference" + | otherwise -> "inbook" + "paper-conference" -> "inproceedings" + "thesis" -> case getVariableAsText "genre" of + Just "mathesis" -> "mastersthesis" + _ -> "phdthesis" + "patent" | variant == Biblatex -> "patent" + "report" | variant == Biblatex -> "report" + | otherwise -> "techreport" + "speech" -> "unpublished" + "manuscript" -> "unpublished" + "graphic" | variant == Biblatex -> "artwork" + "song" | variant == Biblatex -> "music" + "legal_case" | variant == Biblatex -> "jurisdictionN" + "legislation" | variant == Biblatex -> "legislation" + "treaty" | variant == Biblatex -> "legal" + "personal_communication" | variant == Biblatex -> "letter" + "motion_picture" | variant == Biblatex -> "movie" + "review" | variant == Biblatex -> "review" + _ -> "misc" + + mbSubtype = + case referenceType ref of + "article-magazine" -> Just "magazine" + "article-newspaper" -> Just "newspaper" + _ -> Nothing + + fs = + case variant of + Biblatex -> + [ "author" + , "editor" + , "translator" + , "publisher" + , "title" + , "booktitle" + , "journal" + , "series" + , "edition" + , "volume" + , "volumes" + , "number" + , "pages" + , "date" + , "eventdate" + , "urldate" + , "address" + , "url" + , "doi" + , "isbn" + , "issn" + , "type" + , "entrysubtype" + , "note" + , "langid" + , "abstract" + , "keywords" + , "annote" + ] + Bibtex -> + [ "author" + , "editor" + , "translator" + , "publisher" + , "title" + , "booktitle" + , "journal" + , "series" + , "edition" + , "volume" + , "number" + , "pages" + , "year" + , "month" + , "address" + , "type" + , "note" + , "annote" + ] + + valToInlines (TextVal t) = B.text t + valToInlines (FancyVal ils) = ils + valToInlines (NumVal n) = B.text (T.pack $ show n) + valToInlines (NamesVal names) = + mconcat $ intersperse (B.space <> B.text "and" <> B.space) + $ map renderName names + valToInlines (DateVal date) = B.text $ + case dateLiteral date of + Just t -> t + Nothing -> T.intercalate "/" (map renderDatePart (dateParts date)) <> + (if dateCirca date then "~" else mempty) + + renderDatePart (DateParts xs) = T.intercalate "-" $ + map (T.pack . printf "%02d") xs + + renderName name = + case nameLiteral name of + Just t -> B.text t + Nothing -> spacedMaybes + [ nameNonDroppingParticle name + , nameFamily name + , if nameCommaSuffix name + then (", " <>) <$> nameSuffix name + else nameSuffix name ] + <> + spacedMaybes + [ (", " <>) <$> nameGiven name, + nameDroppingParticle name ] + + mblang' = case getVariableAsText "language" of + Just l -> either (const Nothing) Just $ parseLang l + Nothing -> mblang + + titlecase = case mblang' of + Just lang | langLanguage lang == "en" + -> titlecase' + Nothing -> titlecase' + _ -> + case variant of + Bibtex -> B.spanWith nullAttr + -- BibTex lacks a language field, so we wrap non-English + -- titles in {} to protect case. + Biblatex -> id + + titlecase' = addTextCase mblang' TitleCase . + (\ils -> B.fromList + (case B.toList ils of + Str t : xs -> Str t : Walk.walk spanAroundCapitalizedWords xs + xs -> Walk.walk spanAroundCapitalizedWords xs)) + + -- protect capitalized words when we titlecase + spanAroundCapitalizedWords (Str t) + | not (T.all (\c -> isLower c || not (isLetter c)) t) = + Span ("",["nocase"],[]) [Str t] + spanAroundCapitalizedWords x = x + + spacedMaybes = mconcat . intersperse B.space . mapMaybe (fmap B.text) + + toLaTeX x = + case runPure (writeLaTeX opts $ doc (B.plain x)) of + Left _ -> Nothing + Right t -> Just $ hsep . map literal $ T.words t + + renderField :: Text -> Maybe (Doc Text) + renderField name = + (((literal name) <>) . hang 2 " = " . braces) + <$> getContentsFor name + + getVariable v = lookupVariable (toVariable v) ref + + getVariableAsText v = (stringify . valToInlines) <$> getVariable v + + getYear val = + case val of + DateVal date -> + case dateLiteral date of + Just t -> toLaTeX (B.text t) + Nothing -> + case dateParts date of + [DateParts (y1:_), DateParts (y2:_)] -> + Just $ literal (T.pack (printf "%04d" y1) <> "--" <> + T.pack (printf "%04d" y2)) + [DateParts (y1:_)] -> + Just $ literal (T.pack (printf "%04d" y1)) + _ -> Nothing + _ -> Nothing + + toMonth 1 = "jan" + toMonth 2 = "feb" + toMonth 3 = "mar" + toMonth 4 = "apr" + toMonth 5 = "may" + toMonth 6 = "jun" + toMonth 7 = "jul" + toMonth 8 = "aug" + toMonth 9 = "sep" + toMonth 10 = "oct" + toMonth 11 = "nov" + toMonth 12 = "dec" + toMonth x = T.pack $ show x + + getMonth val = + case val of + DateVal date -> + case dateParts date of + [DateParts (_:m1:_), DateParts (_:m2:_)] -> + Just $ literal (toMonth m1 <> "--" <> toMonth m2) + [DateParts (_:m1:_)] -> Just $ literal (toMonth m1) + _ -> Nothing + _ -> Nothing + + getContentsFor :: Text -> Maybe (Doc Text) + getContentsFor "type" = + getVariableAsText "genre" >>= + \case + "mathesis" -> Just "mastersthesis" + "phdthesis" -> Just "phdthesis" + _ -> Nothing + getContentsFor "entrysubtype" = literal <$> mbSubtype + getContentsFor "journal" + | bibtexType `elem` ["article", "periodical", "suppperiodical", "review"] + = getVariable "container-title" >>= toLaTeX . valToInlines + | otherwise = Nothing + getContentsFor "booktitle" + | bibtexType `elem` + ["inbook","incollection","inproceedings","inreference","bookinbook"] + = (getVariable "volume-title" <|> getVariable "container-title") + >>= toLaTeX . valToInlines + | otherwise = Nothing + getContentsFor "series" = getVariable "collection-title" + >>= toLaTeX . valToInlines + getContentsFor "address" = getVariable "publisher-place" + >>= toLaTeX . valToInlines + getContentsFor "date" = getVariable "issued" >>= toLaTeX . valToInlines + getContentsFor "eventdate" = getVariable "event-date" >>= toLaTeX . valToInlines + getContentsFor "urldate" = getVariable "accessed" >>= toLaTeX . valToInlines + getContentsFor "year" = getVariable "issued" >>= getYear + getContentsFor "month" = getVariable "issued" >>= getMonth + getContentsFor "pages" = getVariable "page" >>= toLaTeX . valToInlines + getContentsFor "langid" = getVariable "language" >>= toLaTeX . valToInlines + getContentsFor "number" = (getVariable "number" + <|> getVariable "collection-number" + <|> getVariable "issue") >>= toLaTeX . valToInlines + + getContentsFor x = getVariable x >>= + if isURL x + then Just . literal . stringify . valToInlines + else toLaTeX . + (if x == "title" + then titlecase + else id) . + valToInlines + + isURL x = x `elem` ["url","doi","issn","isbn"] + + renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField defaultLang :: Lang -defaultLang = Lang "en" (Just "US") +defaultLang = Lang "en" Nothing (Just "US") [] [] [] -- a map of bibtex "string" macros type StringMap = Map.Map Text Text -type BibParser = Parser Text (Lang, StringMap) +type BibParser = Parser Sources (Lang, StringMap) data Item = Item{ identifier :: Text , sourcePos :: SourcePos @@ -89,9 +359,7 @@ itemToReference locale variant item = do bib item $ do let lang = fromMaybe defaultLang $ localeLanguage locale modify $ \st -> st{ localeLang = lang, - untitlecase = case lang of - (Lang "en" _) -> True - _ -> False } + untitlecase = langLanguage lang == "en" } id' <- asks identifier otherIds <- (Just <$> getRawField "ids") @@ -315,10 +583,10 @@ itemToReference locale variant item = do eprint <- getRawField "eprint" let baseUrl = case T.toLower etype of - "arxiv" -> "http://arxiv.org/abs/" - "jstor" -> "http://www.jstor.org/stable/" - "pubmed" -> "http://www.ncbi.nlm.nih.gov/pubmed/" - "googlebooks" -> "http://books.google.com?id=" + "arxiv" -> "https://arxiv.org/abs/" + "jstor" -> "https://www.jstor.org/stable/" + "pubmed" -> "https://www.ncbi.nlm.nih.gov/pubmed/" + "googlebooks" -> "https://books.google.com?id=" _ -> "" if T.null baseUrl then mzero @@ -449,7 +717,7 @@ itemToReference locale variant item = do bib :: Item -> Bib a -> BibParser a -bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US"))) +bib entry m = fst <$> evalRWST m entry (BibState True defaultLang) resolveCrossRefs :: Variant -> [Item] -> [Item] resolveCrossRefs variant entries = @@ -502,41 +770,10 @@ blocksToInlines bs = _ -> B.fromList $ Walk.query (:[]) bs adjustSpans :: Lang -> Inline -> Inline -adjustSpans lang (RawInline (Format "latex") s) - | s == "\\hyphen" || s == "\\hyphen " = Str "-" - | otherwise = parseRawLaTeX lang s +adjustSpans lang (Span ("",[],[("bibstring",s)]) _) = Str $ resolveKey' lang s adjustSpans _ SoftBreak = Space adjustSpans _ x = x -parseRawLaTeX :: Lang -> Text -> Inline -parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) = - case parseLaTeX lang contents of - Right [Para ys] -> f command ys - Right [Plain ys] -> f command ys - Right [] -> f command [] - _ -> RawInline (Format "latex") t - where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs - command = T.strip command' - contents = T.drop 1 $ T.dropEnd 1 contents' - f "mkbibquote" ils = Span nullAttr [Quoted DoubleQuote ils] - f "mkbibemph" ils = Span nullAttr [Emph ils] - f "mkbibitalic" ils = Span nullAttr [Emph ils] - f "mkbibbold" ils = Span nullAttr [Strong ils] - f "mkbibparens" ils = Span nullAttr $ - [Str "("] ++ ils ++ [Str ")"] - f "mkbibbrackets" ils = Span nullAttr $ - [Str "["] ++ ils ++ [Str "]"] - -- ... both should be nestable & should work in year fields - f "autocap" ils = Span nullAttr ils - -- TODO: should work in year fields - f "textnormal" ils = Span ("",["nodecor"],[]) ils - f "bibstring" [Str s] = Str $ resolveKey' lang s - f "adddot" [] = Str "." - f "adddotspace" [] = Span nullAttr [Str ".", Space] - f "addabbrvspace" [] = Space - f _ ils = Span nullAttr ils -parseRawLaTeX _ t = RawInline (Format "latex") t - latex' :: Text -> Bib [Block] latex' t = do lang <- gets localeLang @@ -572,7 +809,7 @@ bibEntries = do (bibComment <|> bibPreamble <|> bibString)) bibSkip :: BibParser () -bibSkip = () <$ take1WhileP (/='@') +bibSkip = skipMany1 (satisfy (/='@')) bibComment :: BibParser () bibComment = do @@ -597,11 +834,14 @@ bibString = do updateState (\(l,m) -> (l, Map.insert k v m)) return () +take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text +take1WhileP f = T.pack <$> many1 (satisfy f) + inBraces :: BibParser Text inBraces = do char '{' res <- manyTill - ( (T.pack <$> many1 (noneOf "{}\\")) + ( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\') <|> (char '\\' >> ( (char '{' >> return "\\{") <|> (char '}' >> return "\\}") <|> return "\\")) @@ -616,7 +856,7 @@ inQuotes :: BibParser Text inQuotes = do char '"' T.concat <$> manyTill - ( (T.pack <$> many1 (noneOf "\"\\{")) + ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\') <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) <|> braced <$> inBraces ) (char '"') @@ -628,7 +868,7 @@ fieldName = resolveAlias . T.toLower isBibtexKeyChar :: Char -> Bool isBibtexKeyChar c = - isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*&" :: [Char]) + isAlphaNum c || c `elem` (".:;?!`'()$/*@_+=-[]*&" :: [Char]) bibItem :: BibParser Item bibItem = do @@ -812,14 +1052,14 @@ getOldDate prefix = do let dateparts = filter (\x -> x /= DateParts []) $ map toDateParts [(year',month',day'), (endyear',endmonth',endday')] - literal <- if null dateparts - then Just <$> getRawField (prefix <> "year") - else return Nothing + literal' <- if null dateparts + then Just <$> getRawField (prefix <> "year") + else return Nothing return $ Date { dateParts = dateparts , dateCirca = False , dateSeason = Nothing - , dateLiteral = literal } + , dateLiteral = literal' } getRawField :: Text -> Bib Text getRawField f = do @@ -1225,8 +1465,9 @@ resolveKey lang ils = Walk.walk go ils go x = x resolveKey' :: Lang -> Text -> Text -resolveKey' lang@(Lang l _) k = - case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of +resolveKey' lang k = + case Map.lookup (langLanguage lang) biblatexStringMap >>= + Map.lookup (T.toLower k) of Nothing -> k Just (x, _) -> either (const k) stringify $ parseLaTeX lang x diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs index dfdaf2598..848a83a1e 100644 --- a/src/Text/Pandoc/Citeproc/Data.hs +++ b/src/Text/Pandoc/Citeproc/Data.hs @@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE import qualified Data.Text as T import Data.Text (Text) import Text.Pandoc.Citeproc.Util (toIETF) -import Citeproc (Lang(..), parseLang) +import Text.Collate.Lang (Lang(..), parseLang) biblatexLocalizations :: [(FilePath, ByteString)] biblatexLocalizations = $(embedDir "citeproc/biblatex-localization") @@ -21,11 +21,12 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text)) biblatexStringMap = foldr go mempty biblatexLocalizations where go (fp, bs) = - let Lang lang _ = parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) - ls = T.lines $ TE.decodeUtf8 bs - in if length ls > 4 - then M.insert lang (toStringMap $ map (T.splitOn "|") ls) - else id + let ls = T.lines $ TE.decodeUtf8 bs + in case parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) of + Right lang | length ls > 4 + -> M.insert (langLanguage lang) + (toStringMap $ map (T.splitOn "|") ls) + _ -> id toStringMap = foldr go' mempty go' [term, x, y] = M.insert term (x, y) go' _ = id diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index dba762c02..f8931d7b5 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -7,6 +7,7 @@ where import Citeproc.Types import Data.Text (Text) import qualified Data.Text as T +import Data.List (foldl') import Text.Parsec import Text.Pandoc.Definition import Text.Pandoc.Parsing (romanNumeral) @@ -19,7 +20,7 @@ parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline]) parseLocator locale inp = case parse (pLocatorWords (toLocatorMap locale)) "suffix" $ splitInp inp of Right r -> r - Left _ -> (Nothing, inp) + Left _ -> (Nothing, maybeAddComma inp) splitInp :: [Inline] -> [Inline] splitInp = splitStrWhen (\c -> isSpace c || (isPunctuation c && c /= ':')) @@ -41,9 +42,17 @@ pLocatorWords locMap = do -- i.e. the first one will be " 9" return $ if T.null la && T.null lo - then (Nothing, s) + then (Nothing, maybeAddComma s) else (Just (la, T.strip lo), s) +maybeAddComma :: [Inline] -> [Inline] +maybeAddComma [] = [] +maybeAddComma ils@(Space : _) = ils +maybeAddComma ils@(Str t : _) + | Just (c, _) <- T.uncons t + , isPunctuation c = ils +maybeAddComma ils = Str "," : Space : ils + pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text) pLocatorDelimited locMap = try $ do _ <- pMatchChar "{" (== '{') @@ -96,7 +105,7 @@ pLocatorLabel' locMap lim = go "" t <- anyToken ts <- manyTill anyToken (try $ lookAhead lim) let s = acc <> stringify (t:ts) - case M.lookup (T.strip s) locMap of + case M.lookup (T.toCaseFold $ T.strip s) locMap of -- try to find a longer one, or return this one Just l -> go s <|> return (l, False) Nothing -> go s @@ -139,7 +148,7 @@ pBalancedBraces braces p = try $ do where except = notFollowedBy pBraces >> p -- outer and inner - surround = foldl (\a (open, close) -> sur open close except <|> a) + surround = foldl' (\a (open, close) -> sur open close except <|> a) except braces @@ -180,6 +189,7 @@ pPageUnit = roman <|> plainUnit plainUnit = do ts <- many1 (notFollowedBy pSpace >> notFollowedBy pLocatorPunct >> + notFollowedBy pMath >> anyToken) let s = stringify ts -- otherwise look for actual digits or -s @@ -210,6 +220,12 @@ pMatchChar msg f = satisfyTok f' <?> msg pSpace :: LocatorParser Inline pSpace = satisfyTok (\t -> isSpacey t || t == Str "\160") <?> "space" +pMath :: LocatorParser Inline +pMath = satisfyTok isMath + where + isMath (Math{}) = True + isMath _ = False + satisfyTok :: (Inline -> Bool) -> LocatorParser Inline satisfyTok f = tokenPrim show (\sp _ _ -> sp) (\tok -> if f tok then Just tok diff --git a/src/Text/Pandoc/Citeproc/MetaValue.hs b/src/Text/Pandoc/Citeproc/MetaValue.hs index f5a49f49e..b43ca7314 100644 --- a/src/Text/Pandoc/Citeproc/MetaValue.hs +++ b/src/Text/Pandoc/Citeproc/MetaValue.hs @@ -135,12 +135,13 @@ metaValueToVal k v MetaMap _ -> TextVal mempty metaValueToDate :: MetaValue -> Date -metaValueToDate (MetaMap m) = - Date +metaValueToDate (MetaMap m) = fromMaybe + (Date { dateParts = dateparts , dateCirca = circa , dateSeason = season - , dateLiteral = literal } + , dateLiteral = literal }) + rawdate where dateparts = case M.lookup "date-parts" m of Just (MetaList xs) -> @@ -152,6 +153,7 @@ metaValueToDate (MetaMap m) = M.lookup "circa" m >>= metaValueToBool season = M.lookup "season" m >>= metaValueToInt literal = M.lookup "literal" m >>= metaValueToText + rawdate = M.lookup "raw" m >>= metaValueToText >>= rawDateEDTF metaValueToDate (MetaList xs) = Date{ dateParts = mapMaybe metaValueToDateParts xs , dateCirca = False |
