diff options
| author | Igor Pashev <pashev.igor@gmail.com> | 2021-12-29 15:00:59 +0200 |
|---|---|---|
| committer | Igor Pashev <pashev.igor@gmail.com> | 2021-12-29 15:00:59 +0200 |
| commit | b4361712899fd0183fea5513180cb383979616de (patch) | |
| tree | 688ab7ee2ab3a8cd32b4e37b506099aec95388f7 /src/Text/Pandoc/Citeproc | |
| parent | 726ad97faebe59e024d68d293e663c02bbe423c8 (diff) | |
| parent | d960282b105a6469c760b4308a3b81da723b7256 (diff) | |
| download | pandoc-b4361712899fd0183fea5513180cb383979616de.tar.gz | |
Merge https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Citeproc')
| -rw-r--r-- | src/Text/Pandoc/Citeproc/BibTeX.hs | 81 | ||||
| -rw-r--r-- | src/Text/Pandoc/Citeproc/CslJson.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Citeproc/Locator.hs | 78 | ||||
| -rw-r--r-- | src/Text/Pandoc/Citeproc/Util.hs | 14 |
4 files changed, 94 insertions, 80 deletions
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index c178de6e9..a8e5622ed 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Class (runPure) import qualified Text.Pandoc.Walk as Walk import Citeproc.Types import Citeproc.Pandoc () -import Text.Pandoc.Citeproc.Util (toIETF) +import Text.Pandoc.Citeproc.Util (toIETF, splitStrWhen) import Text.Pandoc.Citeproc.Data (biblatexStringMap) import Data.Default import Data.Text (Text) @@ -48,13 +48,12 @@ import Control.Monad.RWS hiding ((<>)) import qualified Data.Sequence as Seq import Data.Char (isAlphaNum, isDigit, isLetter, isUpper, toLower, toUpper, - isLower, isPunctuation) + isLower, isPunctuation, isSpace) import Data.List (foldl', intercalate, intersperse) import Safe (readMay) import Text.Printf (printf) import Text.DocLayout (literal, hsep, nest, hang, Doc(..), braces, ($$), cr) - data Variant = Bibtex | Biblatex deriving (Show, Eq, Ord) @@ -527,9 +526,9 @@ itemToReference locale variant item = do let fixSeriesTitle [Str xs] | isNumber xs = [Str (ordinalize locale xs), Space, Str (resolveKey' lang "jourser")] fixSeriesTitle xs = xs - seriesTitle' <- (Just . B.fromList . fixSeriesTitle . - B.toList . resolveKey lang <$> - getTitle "series") <|> + + seriesTitle' <- (Just . B.fromList . fixSeriesTitle . B.toList + <$> getTitle "series") <|> return Nothing shortTitle' <- (Just <$> (guard (not hasMaintitle || isChapterlike) >> getTitle "shorttitle")) @@ -805,30 +804,34 @@ bibEntries = do skipMany nonEntry many (bibItem <* skipMany nonEntry) where nonEntry = bibSkip <|> + comment <|> try (char '@' >> (bibComment <|> bibPreamble <|> bibString)) bibSkip :: BibParser () -bibSkip = skipMany1 (satisfy (/='@')) +bibSkip = skipMany1 (satisfy (\c -> c /='@' && c /='%')) + +comment :: BibParser () +comment = char '%' *> void anyLine bibComment :: BibParser () bibComment = do cistring "comment" - spaces + spaces' void inBraces <|> bibSkip <|> return () bibPreamble :: BibParser () bibPreamble = do cistring "preamble" - spaces + spaces' void inBraces bibString :: BibParser () bibString = do cistring "string" - spaces + spaces' char '{' - spaces + spaces' (k,v) <- entField char '}' updateState (\(l,m) -> (l, Map.insert k v m)) @@ -842,9 +845,9 @@ inBraces = do char '{' res <- manyTill ( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\') - <|> (char '\\' >> ( (char '{' >> return "\\{") - <|> (char '}' >> return "\\}") - <|> return "\\")) + <|> (char '\\' >> (do c <- oneOf "{}" + return $ T.pack ['\\',c]) + <|> return "\\") <|> (braced <$> inBraces) ) (char '}') return $ T.concat res @@ -856,8 +859,9 @@ inQuotes :: BibParser Text inQuotes = do char '"' T.concat <$> manyTill - ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\') + ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\' && c /= '%') <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) + <|> ("" <$ (char '%' >> anyLine)) <|> braced <$> inBraces ) (char '"') @@ -870,32 +874,35 @@ isBibtexKeyChar :: Char -> Bool isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()$/*@_+=-[]*&" :: [Char]) +spaces' :: BibParser () +spaces' = skipMany (void (satisfy isSpace) <|> comment) + bibItem :: BibParser Item bibItem = do char '@' pos <- getPosition enttype <- T.toLower <$> take1WhileP isLetter - spaces + spaces' char '{' - spaces + spaces' entid <- take1WhileP isBibtexKeyChar - spaces + spaces' char ',' - spaces - entfields <- entField `sepEndBy` (char ',' >> spaces) - spaces + spaces' + entfields <- entField `sepEndBy` (char ',' >> spaces') + spaces' char '}' return $ Item entid pos enttype (Map.fromList entfields) entField :: BibParser (Text, Text) entField = do k <- fieldName - spaces + spaces' char '=' - spaces + spaces' vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy` - try (spaces >> char '#' >> spaces) - spaces + try (spaces' >> char '#' >> spaces') + spaces' return (k, T.concat vs) resolveAlias :: Text -> Text @@ -984,8 +991,12 @@ getTitle f = do ils <- getField f utc <- gets untitlecase lang <- gets localeLang + let ils' = + if f == "series" + then resolveKey lang ils + else ils let processTitle = if utc then unTitlecase (Just lang) else id - return $ processTitle ils + return $ processTitle ils' getShortTitle :: Bool -> Text -> Bib (Maybe Inlines) getShortTitle requireColon f = do @@ -1253,20 +1264,6 @@ toName opts ils = do , nameStaticOrdering = False } -splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline] -splitStrWhen _ [] = [] -splitStrWhen p (Str xs : ys) = map Str (go xs) ++ splitStrWhen p ys - where go s = - let (w,z) = T.break p s - in if T.null z - then if T.null w - then [] - else [w] - else if T.null w - then (T.take 1 z : go (T.drop 1 z)) - else (w : T.take 1 z : go (T.drop 1 z)) -splitStrWhen p (x : ys) = x : splitStrWhen p ys - ordinalize :: Locale -> Text -> Text ordinalize locale n = let terms = localeTerms locale @@ -1460,14 +1457,14 @@ bookTrans z = _ -> [z] resolveKey :: Lang -> Inlines -> Inlines -resolveKey lang ils = Walk.walk go ils +resolveKey lang (Many ils) = Many $ fmap go ils where go (Str s) = Str $ resolveKey' lang s go x = x resolveKey' :: Lang -> Text -> Text resolveKey' lang k = case Map.lookup (langLanguage lang) biblatexStringMap >>= - Map.lookup (T.toLower k) of + Map.lookup k of Nothing -> k Just (x, _) -> either (const k) stringify $ parseLaTeX lang x diff --git a/src/Text/Pandoc/Citeproc/CslJson.hs b/src/Text/Pandoc/Citeproc/CslJson.hs index 862af5188..43c1a87ec 100644 --- a/src/Text/Pandoc/Citeproc/CslJson.hs +++ b/src/Text/Pandoc/Citeproc/CslJson.hs @@ -28,6 +28,7 @@ fromCslJson (CslSub x) = B.subscript (fromCslJson x) fromCslJson (CslSup x) = B.superscript (fromCslJson x) fromCslJson (CslNoCase x) = B.spanWith ("",["nocase"],[]) (fromCslJson x) fromCslJson (CslDiv t x) = B.spanWith ("",["csl-" <> t],[]) (fromCslJson x) +fromCslJson (CslLink u x) = B.link u "" (fromCslJson x) cslJsonToReferences :: ByteString -> Either String [Reference Inlines] cslJsonToReferences raw = diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index f8931d7b5..0b8f79922 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -2,9 +2,13 @@ {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Citeproc.Locator - ( parseLocator ) + ( parseLocator + , toLocatorMap + , LocatorInfo(..) + , LocatorMap(..) ) where import Citeproc.Types +import Text.Pandoc.Citeproc.Util (splitStrWhen) import Data.Text (Text) import qualified Data.Text as T import Data.List (foldl') @@ -16,9 +20,17 @@ import Control.Monad (mzero) import qualified Data.Map as M import Data.Char (isSpace, isPunctuation, isDigit) -parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline]) -parseLocator locale inp = - case parse (pLocatorWords (toLocatorMap locale)) "suffix" $ splitInp inp of + +data LocatorInfo = + LocatorInfo{ locatorRaw :: Text + , locatorLabel :: Text + , locatorLoc :: Text + } + deriving (Show) + +parseLocator :: LocatorMap -> [Inline] -> (Maybe LocatorInfo, [Inline]) +parseLocator locmap inp = + case parse (pLocatorWords locmap) "suffix" $ splitInp inp of Right r -> r Left _ -> (Nothing, maybeAddComma inp) @@ -32,18 +44,16 @@ splitInp = splitStrWhen (\c -> isSpace c || (isPunctuation c && c /= ':')) type LocatorParser = Parsec [Inline] () pLocatorWords :: LocatorMap - -> LocatorParser (Maybe (Text, Text), [Inline]) + -> LocatorParser (Maybe LocatorInfo, [Inline]) pLocatorWords locMap = do optional $ pMatchChar "," (== ',') optional pSpace - (la, lo) <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap + info <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap s <- getInput -- rest is suffix - -- need to trim, otherwise "p. 9" and "9" will have 'different' locators later on - -- i.e. the first one will be " 9" return $ - if T.null la && T.null lo + if T.null (locatorLabel info) && T.null (locatorLoc info) then (Nothing, maybeAddComma s) - else (Just (la, T.strip lo), s) + else (Just info, s) maybeAddComma :: [Inline] -> [Inline] maybeAddComma [] = [] @@ -53,28 +63,30 @@ maybeAddComma ils@(Str t : _) , isPunctuation c = ils maybeAddComma ils = Str "," : Space : ils -pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text) +pLocatorDelimited :: LocatorMap -> LocatorParser LocatorInfo pLocatorDelimited locMap = try $ do _ <- pMatchChar "{" (== '{') skipMany pSpace -- gobble pre-spaces so label doesn't try to include them - (la, _) <- pLocatorLabelDelimited locMap + (rawlab, la, _) <- pLocatorLabelDelimited locMap -- we only care about balancing {} and [] (because of the outer [] scope); -- the rest can be anything let inner = do { t <- anyToken; return (True, stringify t) } gs <- many (pBalancedBraces [('{','}'), ('[',']')] inner) _ <- pMatchChar "}" (== '}') let lo = T.concat $ map snd gs - return (la, lo) + return $ LocatorInfo{ locatorLoc = lo, + locatorLabel = la, + locatorRaw = rawlab <> "{" <> lo <> "}" } -pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Bool) +pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Text, Bool) pLocatorLabelDelimited locMap - = pLocatorLabel' locMap lim <|> return ("page", True) + = pLocatorLabel' locMap lim <|> return ("", "page", True) where lim = stringify <$> anyToken -pLocatorIntegrated :: LocatorMap -> LocatorParser (Text, Text) +pLocatorIntegrated :: LocatorMap -> LocatorParser LocatorInfo pLocatorIntegrated locMap = try $ do - (la, wasImplicit) <- pLocatorLabelIntegrated locMap + (rawlab, la, wasImplicit) <- pLocatorLabelIntegrated locMap -- if we got the label implicitly, we have presupposed the first one is -- going to have a digit, so guarantee that. You _can_ have p. (a) -- because you specified it. @@ -84,17 +96,20 @@ pLocatorIntegrated locMap = try $ do g <- try $ pLocatorWordIntegrated (not wasImplicit) >>= modifier gs <- many (try $ pLocatorWordIntegrated False >>= modifier) let lo = T.concat (g:gs) - return (la, lo) + return $ LocatorInfo{ locatorLabel = la, + locatorLoc = lo, + locatorRaw = rawlab <> lo } -pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Bool) +pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Text, Bool) pLocatorLabelIntegrated locMap - = pLocatorLabel' locMap lim <|> (lookAhead digital >> return ("page", True)) + = pLocatorLabel' locMap lim <|> + (lookAhead digital >> return ("", "page", True)) where lim = try $ pLocatorWordIntegrated True >>= requireRomansOrDigits digital = try $ pLocatorWordIntegrated True >>= requireDigits pLocatorLabel' :: LocatorMap -> LocatorParser Text - -> LocatorParser (Text, Bool) + -> LocatorParser (Text, Text, Bool) pLocatorLabel' locMap lim = go "" where -- grow the match string until we hit the end @@ -105,9 +120,9 @@ pLocatorLabel' locMap lim = go "" t <- anyToken ts <- manyTill anyToken (try $ lookAhead lim) let s = acc <> stringify (t:ts) - case M.lookup (T.toCaseFold $ T.strip s) locMap of + case M.lookup (T.toCaseFold $ T.strip s) (unLocatorMap locMap) of -- try to find a longer one, or return this one - Just l -> go s <|> return (l, False) + Just l -> go s <|> return (s, l, False) Nothing -> go s -- hard requirement for a locator to have some real digits in it @@ -247,27 +262,16 @@ isLocatorSep ',' = True isLocatorSep ';' = True isLocatorSep _ = False -splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline] -splitStrWhen _ [] = [] -splitStrWhen p (Str xs : ys) = go (T.unpack xs) ++ splitStrWhen p ys - where - go [] = [] - go s = case break p s of - ([],[]) -> [] - (zs,[]) -> [Str $ T.pack zs] - ([],w:ws) -> Str (T.singleton w) : go ws - (zs,w:ws) -> Str (T.pack zs) : Str (T.singleton w) : go ws -splitStrWhen p (x : ys) = x : splitStrWhen p ys - -- -- Locator Map -- -type LocatorMap = M.Map Text Text +newtype LocatorMap = LocatorMap { unLocatorMap :: M.Map Text Text } + deriving (Show) toLocatorMap :: Locale -> LocatorMap toLocatorMap locale = - foldr go mempty locatorTerms + LocatorMap $ foldr go mempty locatorTerms where go tname locmap = case M.lookup tname (localeTerms locale) of diff --git a/src/Text/Pandoc/Citeproc/Util.hs b/src/Text/Pandoc/Citeproc/Util.hs index 6d8e01bc9..8bffc0f32 100644 --- a/src/Text/Pandoc/Citeproc/Util.hs +++ b/src/Text/Pandoc/Citeproc/Util.hs @@ -1,9 +1,21 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Citeproc.Util - ( toIETF ) + ( splitStrWhen + , toIETF ) where +import qualified Data.Text as T import Data.Text (Text) +import Text.Pandoc.Definition + +-- Split Str elements so that characters satisfying the +-- predicate each have their own Str. +splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline] +splitStrWhen p = foldr go [] + where + go (Str t) = (map Str (T.groupBy goesTogether t) ++) + go x = (x :) + goesTogether c d = not (p c || p d) toIETF :: Text -> Text toIETF "english" = "en-US" -- "en-EN" unavailable in CSL |
