From ea77f2e6f653d5b570109fa208dc427d99f95b51 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 13 Dec 2021 12:07:24 -0800
Subject: Citeproc changes:

T.P.Citeproc exports `getCiteprocLang` and `getStyle` [API change].

T.P.Citeproc.Locator now exports `toLocatorMap`, `LocatorInfo`,
and `LocatorMap`.  The type of `parseLocator` has changed, so
it now takes a `LocatorMap` rather than a `Locale` as parameter,
and returns a `LocatorInfo` instead of a tuple.
---
 src/Text/Pandoc/Citeproc.hs         | 29 ++++++++---------
 src/Text/Pandoc/Citeproc/Locator.hs | 65 +++++++++++++++++++++++--------------
 2 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 223925522..d769a91c8 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -7,13 +7,15 @@
 module Text.Pandoc.Citeproc
   ( processCitations,
     getReferences,
-    getStyle
+    getStyle,
+    getCiteprocLang
   )
 where
 
 import Citeproc
 import Citeproc.Pandoc ()
-import Text.Pandoc.Citeproc.Locator (parseLocator)
+import Text.Pandoc.Citeproc.Locator (parseLocator, toLocatorMap,
+                                     LocatorInfo(..))
 import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
 import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
@@ -49,12 +51,10 @@ import qualified Data.Text as T
 import System.FilePath (takeExtension)
 import Safe (lastMay, initSafe)
 
-
 processCitations  :: PandocMonad m => Pandoc -> m Pandoc
 processCitations (Pandoc meta bs) = do
   style <- getStyle (Pandoc meta bs)
-
-  mblang <- getLang meta
+  mblang <- getCiteprocLang meta
   let locale = Citeproc.mergeLocales mblang style
 
   refs <- getReferences (Just locale) (Pandoc meta bs)
@@ -166,10 +166,9 @@ getStyle (Pandoc meta _) = do
 
 
 -- Retrieve citeproc lang based on metadata.
-getLang :: PandocMonad m => Meta -> m (Maybe Lang)
-getLang meta = maybe (return Nothing) bcp47LangToIETF
-                 ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>=
-                   metaValueToText)
+getCiteprocLang :: PandocMonad m => Meta -> m (Maybe Lang)
+getCiteprocLang meta = maybe (return Nothing) bcp47LangToIETF
+  ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= metaValueToText)
 
 -- | Get references defined inline in the metadata and via an external
 -- bibliography.  Only references that are actually cited in the document
@@ -181,7 +180,7 @@ getReferences mblocale (Pandoc meta bs) = do
   locale <- case mblocale of
                 Just l  -> return l
                 Nothing -> do
-                  mblang <- getLang meta
+                  mblang <- getCiteprocLang meta
                   case mblang of
                     Just lang -> return $ either mempty id $ getLocale lang
                     Nothing   -> return mempty
@@ -307,17 +306,15 @@ fromPandocCitations :: Locale
                     -> [CitationItem Inlines]
 fromPandocCitations locale otherIdsMap = concatMap go
  where
+  locmap = toLocatorMap locale
   go c =
-    let (loclab, suffix) = parseLocator locale (citationSuffix c)
-        (mblab, mbloc) = case loclab of
-                           Just (loc, lab) -> (Just loc, Just lab)
-                           Nothing         -> (Nothing, Nothing)
+    let (mblocinfo, suffix) = parseLocator locmap (citationSuffix c)
         cit = CitationItem
                { citationItemId = fromMaybe
                    (ItemId $ Pandoc.citationId c)
                    (M.lookup (Pandoc.citationId c) otherIdsMap)
-               , citationItemLabel = mblab
-               , citationItemLocator = mbloc
+               , citationItemLabel = locatorLabel <$> mblocinfo
+               , citationItemLocator = locatorLoc <$> mblocinfo
                , citationItemType = NormalCite
                , citationItemPrefix = case citationPrefix c of
                                         [] -> Nothing
diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs
index ce606197b..0b8f79922 100644
--- a/src/Text/Pandoc/Citeproc/Locator.hs
+++ b/src/Text/Pandoc/Citeproc/Locator.hs
@@ -2,7 +2,10 @@
 {-# LANGUAGE ViewPatterns #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Citeproc.Locator
-  ( parseLocator )
+  ( parseLocator
+  , toLocatorMap
+  , LocatorInfo(..)
+  , LocatorMap(..) )
 where
 import Citeproc.Types
 import Text.Pandoc.Citeproc.Util (splitStrWhen)
@@ -17,9 +20,17 @@ import Control.Monad (mzero)
 import qualified Data.Map as M
 import Data.Char (isSpace, isPunctuation, isDigit)
 
-parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline])
-parseLocator locale inp =
-  case parse (pLocatorWords (toLocatorMap locale)) "suffix" $ splitInp inp of
+
+data LocatorInfo =
+  LocatorInfo{ locatorRaw :: Text
+             , locatorLabel :: Text
+             , locatorLoc :: Text
+             }
+  deriving (Show)
+
+parseLocator :: LocatorMap -> [Inline] -> (Maybe LocatorInfo, [Inline])
+parseLocator locmap inp =
+  case parse (pLocatorWords locmap) "suffix" $ splitInp inp of
        Right r -> r
        Left _  -> (Nothing, maybeAddComma inp)
 
@@ -33,18 +44,16 @@ splitInp = splitStrWhen (\c -> isSpace c || (isPunctuation c && c /= ':'))
 type LocatorParser = Parsec [Inline] ()
 
 pLocatorWords :: LocatorMap
-              -> LocatorParser (Maybe (Text, Text), [Inline])
+              -> LocatorParser (Maybe LocatorInfo, [Inline])
 pLocatorWords locMap = do
   optional $ pMatchChar "," (== ',')
   optional pSpace
-  (la, lo) <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap
+  info <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap
   s <- getInput -- rest is suffix
-  -- need to trim, otherwise "p. 9" and "9" will have 'different' locators later on
-  -- i.e. the first one will be " 9"
   return $
-    if T.null la && T.null lo
+    if T.null (locatorLabel info) && T.null (locatorLoc info)
        then (Nothing, maybeAddComma s)
-       else (Just (la, T.strip lo), s)
+       else (Just info, s)
 
 maybeAddComma :: [Inline] -> [Inline]
 maybeAddComma [] = []
@@ -54,28 +63,30 @@ maybeAddComma ils@(Str t : _)
   , isPunctuation c = ils
 maybeAddComma ils = Str "," : Space : ils
 
-pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text)
+pLocatorDelimited :: LocatorMap -> LocatorParser LocatorInfo
 pLocatorDelimited locMap = try $ do
   _ <- pMatchChar "{" (== '{')
   skipMany pSpace -- gobble pre-spaces so label doesn't try to include them
-  (la, _) <- pLocatorLabelDelimited locMap
+  (rawlab, la, _) <- pLocatorLabelDelimited locMap
   -- we only care about balancing {} and [] (because of the outer [] scope);
   -- the rest can be anything
   let inner = do { t <- anyToken; return (True, stringify t) }
   gs <- many (pBalancedBraces [('{','}'), ('[',']')] inner)
   _ <- pMatchChar "}" (== '}')
   let lo = T.concat $ map snd gs
-  return (la, lo)
+  return $ LocatorInfo{ locatorLoc = lo,
+                        locatorLabel = la,
+                        locatorRaw = rawlab <> "{" <> lo <> "}" }
 
-pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Bool)
+pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Text, Bool)
 pLocatorLabelDelimited locMap
-  = pLocatorLabel' locMap lim <|> return ("page", True)
+  = pLocatorLabel' locMap lim <|> return ("", "page", True)
     where
         lim = stringify <$> anyToken
 
-pLocatorIntegrated :: LocatorMap -> LocatorParser (Text, Text)
+pLocatorIntegrated :: LocatorMap -> LocatorParser LocatorInfo
 pLocatorIntegrated locMap = try $ do
-  (la, wasImplicit) <- pLocatorLabelIntegrated locMap
+  (rawlab, la, wasImplicit) <- pLocatorLabelIntegrated locMap
   -- if we got the label implicitly, we have presupposed the first one is
   -- going to have a digit, so guarantee that. You _can_ have p. (a)
   -- because you specified it.
@@ -85,17 +96,20 @@ pLocatorIntegrated locMap = try $ do
   g <- try $ pLocatorWordIntegrated (not wasImplicit) >>= modifier
   gs <- many (try $ pLocatorWordIntegrated False >>= modifier)
   let lo = T.concat (g:gs)
-  return (la, lo)
+  return $ LocatorInfo{ locatorLabel = la,
+                        locatorLoc = lo,
+                        locatorRaw = rawlab <> lo }
 
-pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Bool)
+pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Text, Bool)
 pLocatorLabelIntegrated locMap
-  = pLocatorLabel' locMap lim <|> (lookAhead digital >> return ("page", True))
+  = pLocatorLabel' locMap lim <|>
+     (lookAhead digital >> return ("", "page", True))
     where
       lim = try $ pLocatorWordIntegrated True >>= requireRomansOrDigits
       digital = try $ pLocatorWordIntegrated True >>= requireDigits
 
 pLocatorLabel' :: LocatorMap -> LocatorParser Text
-               -> LocatorParser (Text, Bool)
+               -> LocatorParser (Text, Text, Bool)
 pLocatorLabel' locMap lim = go ""
     where
       -- grow the match string until we hit the end
@@ -106,9 +120,9 @@ pLocatorLabel' locMap lim = go ""
           t <- anyToken
           ts <- manyTill anyToken (try $ lookAhead lim)
           let s = acc <> stringify (t:ts)
-          case M.lookup (T.toCaseFold $ T.strip s) locMap of
+          case M.lookup (T.toCaseFold $ T.strip s) (unLocatorMap locMap) of
             -- try to find a longer one, or return this one
-            Just l -> go s <|> return (l, False)
+            Just l -> go s <|> return (s, l, False)
             Nothing -> go s
 
 -- hard requirement for a locator to have some real digits in it
@@ -252,11 +266,12 @@ isLocatorSep _   = False
 -- Locator Map
 --
 
-type LocatorMap = M.Map Text Text
+newtype LocatorMap = LocatorMap { unLocatorMap :: M.Map Text Text }
+  deriving (Show)
 
 toLocatorMap :: Locale -> LocatorMap
 toLocatorMap locale =
-  foldr go mempty locatorTerms
+  LocatorMap $ foldr go mempty locatorTerms
  where
   go tname locmap =
     case M.lookup tname (localeTerms locale) of
-- 
cgit v1.2.3