From ea77f2e6f653d5b570109fa208dc427d99f95b51 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 13 Dec 2021 12:07:24 -0800 Subject: Citeproc changes: T.P.Citeproc exports `getCiteprocLang` and `getStyle` [API change]. T.P.Citeproc.Locator now exports `toLocatorMap`, `LocatorInfo`, and `LocatorMap`. The type of `parseLocator` has changed, so it now takes a `LocatorMap` rather than a `Locale` as parameter, and returns a `LocatorInfo` instead of a tuple. --- src/Text/Pandoc/Citeproc.hs | 29 ++++++++--------- src/Text/Pandoc/Citeproc/Locator.hs | 65 +++++++++++++++++++++++-------------- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 223925522..d769a91c8 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -7,13 +7,15 @@ module Text.Pandoc.Citeproc ( processCitations, getReferences, - getStyle + getStyle, + getCiteprocLang ) where import Citeproc import Citeproc.Pandoc () -import Text.Pandoc.Citeproc.Locator (parseLocator) +import Text.Pandoc.Citeproc.Locator (parseLocator, toLocatorMap, + LocatorInfo(..)) import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..)) import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText) @@ -49,12 +51,10 @@ import qualified Data.Text as T import System.FilePath (takeExtension) import Safe (lastMay, initSafe) - processCitations :: PandocMonad m => Pandoc -> m Pandoc processCitations (Pandoc meta bs) = do style <- getStyle (Pandoc meta bs) - - mblang <- getLang meta + mblang <- getCiteprocLang meta let locale = Citeproc.mergeLocales mblang style refs <- getReferences (Just locale) (Pandoc meta bs) @@ -166,10 +166,9 @@ getStyle (Pandoc meta _) = do -- Retrieve citeproc lang based on metadata. -getLang :: PandocMonad m => Meta -> m (Maybe Lang) -getLang meta = maybe (return Nothing) bcp47LangToIETF - ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= - metaValueToText) +getCiteprocLang :: PandocMonad m => Meta -> m (Maybe Lang) +getCiteprocLang meta = maybe (return Nothing) bcp47LangToIETF + ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= metaValueToText) -- | Get references defined inline in the metadata and via an external -- bibliography. Only references that are actually cited in the document @@ -181,7 +180,7 @@ getReferences mblocale (Pandoc meta bs) = do locale <- case mblocale of Just l -> return l Nothing -> do - mblang <- getLang meta + mblang <- getCiteprocLang meta case mblang of Just lang -> return $ either mempty id $ getLocale lang Nothing -> return mempty @@ -307,17 +306,15 @@ fromPandocCitations :: Locale -> [CitationItem Inlines] fromPandocCitations locale otherIdsMap = concatMap go where + locmap = toLocatorMap locale go c = - let (loclab, suffix) = parseLocator locale (citationSuffix c) - (mblab, mbloc) = case loclab of - Just (loc, lab) -> (Just loc, Just lab) - Nothing -> (Nothing, Nothing) + let (mblocinfo, suffix) = parseLocator locmap (citationSuffix c) cit = CitationItem { citationItemId = fromMaybe (ItemId $ Pandoc.citationId c) (M.lookup (Pandoc.citationId c) otherIdsMap) - , citationItemLabel = mblab - , citationItemLocator = mbloc + , citationItemLabel = locatorLabel <$> mblocinfo + , citationItemLocator = locatorLoc <$> mblocinfo , citationItemType = NormalCite , citationItemPrefix = case citationPrefix c of [] -> Nothing diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index ce606197b..0b8f79922 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -2,7 +2,10 @@ {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Citeproc.Locator - ( parseLocator ) + ( parseLocator + , toLocatorMap + , LocatorInfo(..) + , LocatorMap(..) ) where import Citeproc.Types import Text.Pandoc.Citeproc.Util (splitStrWhen) @@ -17,9 +20,17 @@ import Control.Monad (mzero) import qualified Data.Map as M import Data.Char (isSpace, isPunctuation, isDigit) -parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline]) -parseLocator locale inp = - case parse (pLocatorWords (toLocatorMap locale)) "suffix" $ splitInp inp of + +data LocatorInfo = + LocatorInfo{ locatorRaw :: Text + , locatorLabel :: Text + , locatorLoc :: Text + } + deriving (Show) + +parseLocator :: LocatorMap -> [Inline] -> (Maybe LocatorInfo, [Inline]) +parseLocator locmap inp = + case parse (pLocatorWords locmap) "suffix" $ splitInp inp of Right r -> r Left _ -> (Nothing, maybeAddComma inp) @@ -33,18 +44,16 @@ splitInp = splitStrWhen (\c -> isSpace c || (isPunctuation c && c /= ':')) type LocatorParser = Parsec [Inline] () pLocatorWords :: LocatorMap - -> LocatorParser (Maybe (Text, Text), [Inline]) + -> LocatorParser (Maybe LocatorInfo, [Inline]) pLocatorWords locMap = do optional $ pMatchChar "," (== ',') optional pSpace - (la, lo) <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap + info <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap s <- getInput -- rest is suffix - -- need to trim, otherwise "p. 9" and "9" will have 'different' locators later on - -- i.e. the first one will be " 9" return $ - if T.null la && T.null lo + if T.null (locatorLabel info) && T.null (locatorLoc info) then (Nothing, maybeAddComma s) - else (Just (la, T.strip lo), s) + else (Just info, s) maybeAddComma :: [Inline] -> [Inline] maybeAddComma [] = [] @@ -54,28 +63,30 @@ maybeAddComma ils@(Str t : _) , isPunctuation c = ils maybeAddComma ils = Str "," : Space : ils -pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text) +pLocatorDelimited :: LocatorMap -> LocatorParser LocatorInfo pLocatorDelimited locMap = try $ do _ <- pMatchChar "{" (== '{') skipMany pSpace -- gobble pre-spaces so label doesn't try to include them - (la, _) <- pLocatorLabelDelimited locMap + (rawlab, la, _) <- pLocatorLabelDelimited locMap -- we only care about balancing {} and [] (because of the outer [] scope); -- the rest can be anything let inner = do { t <- anyToken; return (True, stringify t) } gs <- many (pBalancedBraces [('{','}'), ('[',']')] inner) _ <- pMatchChar "}" (== '}') let lo = T.concat $ map snd gs - return (la, lo) + return $ LocatorInfo{ locatorLoc = lo, + locatorLabel = la, + locatorRaw = rawlab <> "{" <> lo <> "}" } -pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Bool) +pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Text, Bool) pLocatorLabelDelimited locMap - = pLocatorLabel' locMap lim <|> return ("page", True) + = pLocatorLabel' locMap lim <|> return ("", "page", True) where lim = stringify <$> anyToken -pLocatorIntegrated :: LocatorMap -> LocatorParser (Text, Text) +pLocatorIntegrated :: LocatorMap -> LocatorParser LocatorInfo pLocatorIntegrated locMap = try $ do - (la, wasImplicit) <- pLocatorLabelIntegrated locMap + (rawlab, la, wasImplicit) <- pLocatorLabelIntegrated locMap -- if we got the label implicitly, we have presupposed the first one is -- going to have a digit, so guarantee that. You _can_ have p. (a) -- because you specified it. @@ -85,17 +96,20 @@ pLocatorIntegrated locMap = try $ do g <- try $ pLocatorWordIntegrated (not wasImplicit) >>= modifier gs <- many (try $ pLocatorWordIntegrated False >>= modifier) let lo = T.concat (g:gs) - return (la, lo) + return $ LocatorInfo{ locatorLabel = la, + locatorLoc = lo, + locatorRaw = rawlab <> lo } -pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Bool) +pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Text, Bool) pLocatorLabelIntegrated locMap - = pLocatorLabel' locMap lim <|> (lookAhead digital >> return ("page", True)) + = pLocatorLabel' locMap lim <|> + (lookAhead digital >> return ("", "page", True)) where lim = try $ pLocatorWordIntegrated True >>= requireRomansOrDigits digital = try $ pLocatorWordIntegrated True >>= requireDigits pLocatorLabel' :: LocatorMap -> LocatorParser Text - -> LocatorParser (Text, Bool) + -> LocatorParser (Text, Text, Bool) pLocatorLabel' locMap lim = go "" where -- grow the match string until we hit the end @@ -106,9 +120,9 @@ pLocatorLabel' locMap lim = go "" t <- anyToken ts <- manyTill anyToken (try $ lookAhead lim) let s = acc <> stringify (t:ts) - case M.lookup (T.toCaseFold $ T.strip s) locMap of + case M.lookup (T.toCaseFold $ T.strip s) (unLocatorMap locMap) of -- try to find a longer one, or return this one - Just l -> go s <|> return (l, False) + Just l -> go s <|> return (s, l, False) Nothing -> go s -- hard requirement for a locator to have some real digits in it @@ -252,11 +266,12 @@ isLocatorSep _ = False -- Locator Map -- -type LocatorMap = M.Map Text Text +newtype LocatorMap = LocatorMap { unLocatorMap :: M.Map Text Text } + deriving (Show) toLocatorMap :: Locale -> LocatorMap toLocatorMap locale = - foldr go mempty locatorTerms + LocatorMap $ foldr go mempty locatorTerms where go tname locmap = case M.lookup tname (localeTerms locale) of -- cgit v1.2.3