Merge https://github.com/jgm/pandoc

author: Igor Pashev <pashev.igor@gmail.com> 2021-12-29 15:00:59 +0200
committer: Igor Pashev <pashev.igor@gmail.com> 2021-12-29 15:00:59 +0200
commit: b4361712899fd0183fea5513180cb383979616de (patch)
tree: 688ab7ee2ab3a8cd32b4e37b506099aec95388f7 /src/Text/Pandoc/Citeproc
parent: 726ad97faebe59e024d68d293e663c02bbe423c8 (diff)
parent: d960282b105a6469c760b4308a3b81da723b7256 (diff)
download: pandoc-b4361712899fd0183fea5513180cb383979616de.tar.gz
4 files changed, 94 insertions, 80 deletions
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index c178de6e9..a8e5622ed 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -34,7 +34,7 @@ import Text.Pandoc.Class (runPure)
 import qualified Text.Pandoc.Walk       as Walk
 import Citeproc.Types
 import Citeproc.Pandoc ()
-import Text.Pandoc.Citeproc.Util (toIETF)
+import Text.Pandoc.Citeproc.Util (toIETF, splitStrWhen)
 import Text.Pandoc.Citeproc.Data (biblatexStringMap)
 import Data.Default
 import           Data.Text              (Text)
@@ -48,13 +48,12 @@ import           Control.Monad.RWS      hiding ((<>))
 import qualified Data.Sequence          as Seq
 import           Data.Char              (isAlphaNum, isDigit, isLetter,
                                          isUpper, toLower, toUpper,
-                                         isLower, isPunctuation)
+                                         isLower, isPunctuation, isSpace)
 import           Data.List              (foldl', intercalate, intersperse)
 import           Safe                   (readMay)
 import           Text.Printf            (printf)
 import           Text.DocLayout         (literal, hsep, nest, hang, Doc(..),
                                          braces, ($$), cr)
-
 data Variant = Bibtex | Biblatex
   deriving (Show, Eq, Ord)
 
@@ -527,9 +526,9 @@ itemToReference locale variant item = do
     let fixSeriesTitle [Str xs] | isNumber xs =
           [Str (ordinalize locale xs), Space, Str (resolveKey' lang "jourser")]
         fixSeriesTitle xs = xs
-    seriesTitle' <- (Just . B.fromList . fixSeriesTitle .
-                     B.toList . resolveKey lang <$>
-                        getTitle "series") <|>
+
+    seriesTitle' <- (Just . B.fromList . fixSeriesTitle . B.toList
+                       <$> getTitle "series") <|>
                     return Nothing
     shortTitle' <- (Just <$> (guard (not hasMaintitle || isChapterlike) >>
                               getTitle "shorttitle"))
@@ -805,30 +804,34 @@ bibEntries = do
   skipMany nonEntry
   many (bibItem <* skipMany nonEntry)
  where nonEntry = bibSkip <|>
+                  comment <|>
                   try (char '@' >>
                        (bibComment <|> bibPreamble <|> bibString))
 
 bibSkip :: BibParser ()
-bibSkip = skipMany1 (satisfy (/='@'))
+bibSkip = skipMany1 (satisfy (\c -> c /='@' && c /='%'))
+
+comment :: BibParser ()
+comment = char '%' *> void anyLine
 
 bibComment :: BibParser ()
 bibComment = do
   cistring "comment"
-  spaces
+  spaces'
   void inBraces <|> bibSkip <|> return ()
 
 bibPreamble :: BibParser ()
 bibPreamble = do
   cistring "preamble"
-  spaces
+  spaces'
   void inBraces
 
 bibString :: BibParser ()
 bibString = do
   cistring "string"
-  spaces
+  spaces'
   char '{'
-  spaces
+  spaces'
   (k,v) <- entField
   char '}'
   updateState (\(l,m) -> (l, Map.insert k v m))
@@ -842,9 +845,9 @@ inBraces = do
   char '{'
   res <- manyTill
          (  take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\')
-        <|> (char '\\' >> (  (char '{' >> return "\\{")
-                         <|> (char '}' >> return "\\}")
-                         <|> return "\\"))
+        <|> (char '\\' >> (do c <- oneOf "{}"
+                              return $ T.pack ['\\',c])
+                         <|> return "\\")
         <|> (braced <$> inBraces)
          ) (char '}')
   return $ T.concat res
@@ -856,8 +859,9 @@ inQuotes :: BibParser Text
 inQuotes = do
   char '"'
   T.concat <$> manyTill
-             ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\')
+             ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\' && c /= '%')
                <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
+               <|> ("" <$ (char '%' >> anyLine))
                <|> braced <$> inBraces
             ) (char '"')
 
@@ -870,32 +874,35 @@ isBibtexKeyChar :: Char -> Bool
 isBibtexKeyChar c =
   isAlphaNum c || c `elem` (".:;?!`'()$/*@_+=-[]*&" :: [Char])
 
+spaces' :: BibParser ()
+spaces' = skipMany (void (satisfy isSpace) <|> comment)
+
 bibItem :: BibParser Item
 bibItem = do
   char '@'
   pos <- getPosition
   enttype <- T.toLower <$> take1WhileP isLetter
-  spaces
+  spaces'
   char '{'
-  spaces
+  spaces'
   entid <- take1WhileP isBibtexKeyChar
-  spaces
+  spaces'
   char ','
-  spaces
-  entfields <- entField `sepEndBy` (char ',' >> spaces)
-  spaces
+  spaces'
+  entfields <- entField `sepEndBy` (char ',' >> spaces')
+  spaces'
   char '}'
   return $ Item entid pos enttype (Map.fromList entfields)
 
 entField :: BibParser (Text, Text)
 entField = do
   k <- fieldName
-  spaces
+  spaces'
   char '='
-  spaces
+  spaces'
   vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
-            try (spaces >> char '#' >> spaces)
-  spaces
+            try (spaces' >> char '#' >> spaces')
+  spaces'
   return (k, T.concat vs)
 
 resolveAlias :: Text -> Text
@@ -984,8 +991,12 @@ getTitle f = do
   ils <- getField f
   utc <- gets untitlecase
   lang <- gets localeLang
+  let ils' =
+        if f == "series"
+           then resolveKey lang ils
+           else ils
   let processTitle = if utc then unTitlecase (Just lang) else id
-  return $ processTitle ils
+  return $ processTitle ils'
 
 getShortTitle :: Bool -> Text -> Bib (Maybe Inlines)
 getShortTitle requireColon f = do
@@ -1253,20 +1264,6 @@ toName opts ils = do
           , nameStaticOrdering      = False
           }
 
-splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
-splitStrWhen _ [] = []
-splitStrWhen p (Str xs : ys) = map Str (go xs) ++ splitStrWhen p ys
-  where go s =
-          let (w,z) = T.break p s
-           in if T.null z
-                 then if T.null w
-                         then []
-                         else [w]
-                 else if T.null w
-                         then (T.take 1 z : go (T.drop 1 z))
-                         else (w : T.take 1 z : go (T.drop 1 z))
-splitStrWhen p (x : ys) = x : splitStrWhen p ys
-
 ordinalize :: Locale -> Text -> Text
 ordinalize locale n =
   let terms = localeTerms locale
@@ -1460,14 +1457,14 @@ bookTrans z =
        _                -> [z]
 
 resolveKey :: Lang -> Inlines -> Inlines
-resolveKey lang ils = Walk.walk go ils
+resolveKey lang (Many ils) = Many $ fmap go ils
   where go (Str s) = Str $ resolveKey' lang s
         go x       = x
 
 resolveKey' :: Lang -> Text -> Text
 resolveKey' lang k =
   case Map.lookup (langLanguage lang) biblatexStringMap >>=
-        Map.lookup (T.toLower k) of
+        Map.lookup k of
     Nothing     -> k
     Just (x, _) -> either (const k) stringify $ parseLaTeX lang x
 
diff --git a/src/Text/Pandoc/Citeproc/CslJson.hs b/src/Text/Pandoc/Citeproc/CslJson.hs
index 862af5188..43c1a87ec 100644
--- a/src/Text/Pandoc/Citeproc/CslJson.hs
+++ b/src/Text/Pandoc/Citeproc/CslJson.hs
@@ -28,6 +28,7 @@ fromCslJson (CslSub x) = B.subscript (fromCslJson x)
 fromCslJson (CslSup x) = B.superscript (fromCslJson x)
 fromCslJson (CslNoCase x) = B.spanWith ("",["nocase"],[]) (fromCslJson x)
 fromCslJson (CslDiv t x) = B.spanWith ("",["csl-" <> t],[]) (fromCslJson x)
+fromCslJson (CslLink u x) = B.link u "" (fromCslJson x)
 
 cslJsonToReferences :: ByteString -> Either String [Reference Inlines]
 cslJsonToReferences raw =
diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs
index f8931d7b5..0b8f79922 100644
--- a/src/Text/Pandoc/Citeproc/Locator.hs
+++ b/src/Text/Pandoc/Citeproc/Locator.hs
@@ -2,9 +2,13 @@
 {-# LANGUAGE ViewPatterns #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Citeproc.Locator
-  ( parseLocator )
+  ( parseLocator
+  , toLocatorMap
+  , LocatorInfo(..)
+  , LocatorMap(..) )
 where
 import Citeproc.Types
+import Text.Pandoc.Citeproc.Util (splitStrWhen)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.List (foldl')
@@ -16,9 +20,17 @@ import Control.Monad (mzero)
 import qualified Data.Map as M
 import Data.Char (isSpace, isPunctuation, isDigit)
 
-parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline])
-parseLocator locale inp =
-  case parse (pLocatorWords (toLocatorMap locale)) "suffix" $ splitInp inp of
+
+data LocatorInfo =
+  LocatorInfo{ locatorRaw :: Text
+             , locatorLabel :: Text
+             , locatorLoc :: Text
+             }
+  deriving (Show)
+
+parseLocator :: LocatorMap -> [Inline] -> (Maybe LocatorInfo, [Inline])
+parseLocator locmap inp =
+  case parse (pLocatorWords locmap) "suffix" $ splitInp inp of
        Right r -> r
        Left _  -> (Nothing, maybeAddComma inp)
 
@@ -32,18 +44,16 @@ splitInp = splitStrWhen (\c -> isSpace c || (isPunctuation c && c /= ':'))
 type LocatorParser = Parsec [Inline] ()
 
 pLocatorWords :: LocatorMap
-              -> LocatorParser (Maybe (Text, Text), [Inline])
+              -> LocatorParser (Maybe LocatorInfo, [Inline])
 pLocatorWords locMap = do
   optional $ pMatchChar "," (== ',')
   optional pSpace
-  (la, lo) <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap
+  info <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap
   s <- getInput -- rest is suffix
-  -- need to trim, otherwise "p. 9" and "9" will have 'different' locators later on
-  -- i.e. the first one will be " 9"
   return $
-    if T.null la && T.null lo
+    if T.null (locatorLabel info) && T.null (locatorLoc info)
        then (Nothing, maybeAddComma s)
-       else (Just (la, T.strip lo), s)
+       else (Just info, s)
 
 maybeAddComma :: [Inline] -> [Inline]
 maybeAddComma [] = []
@@ -53,28 +63,30 @@ maybeAddComma ils@(Str t : _)
   , isPunctuation c = ils
 maybeAddComma ils = Str "," : Space : ils
 
-pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text)
+pLocatorDelimited :: LocatorMap -> LocatorParser LocatorInfo
 pLocatorDelimited locMap = try $ do
   _ <- pMatchChar "{" (== '{')
   skipMany pSpace -- gobble pre-spaces so label doesn't try to include them
-  (la, _) <- pLocatorLabelDelimited locMap
+  (rawlab, la, _) <- pLocatorLabelDelimited locMap
   -- we only care about balancing {} and [] (because of the outer [] scope);
   -- the rest can be anything
   let inner = do { t <- anyToken; return (True, stringify t) }
   gs <- many (pBalancedBraces [('{','}'), ('[',']')] inner)
   _ <- pMatchChar "}" (== '}')
   let lo = T.concat $ map snd gs
-  return (la, lo)
+  return $ LocatorInfo{ locatorLoc = lo,
+                        locatorLabel = la,
+                        locatorRaw = rawlab <> "{" <> lo <> "}" }
 
-pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Bool)
+pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Text, Bool)
 pLocatorLabelDelimited locMap
-  = pLocatorLabel' locMap lim <|> return ("page", True)
+  = pLocatorLabel' locMap lim <|> return ("", "page", True)
     where
         lim = stringify <$> anyToken
 
-pLocatorIntegrated :: LocatorMap -> LocatorParser (Text, Text)
+pLocatorIntegrated :: LocatorMap -> LocatorParser LocatorInfo
 pLocatorIntegrated locMap = try $ do
-  (la, wasImplicit) <- pLocatorLabelIntegrated locMap
+  (rawlab, la, wasImplicit) <- pLocatorLabelIntegrated locMap
   -- if we got the label implicitly, we have presupposed the first one is
   -- going to have a digit, so guarantee that. You _can_ have p. (a)
   -- because you specified it.
@@ -84,17 +96,20 @@ pLocatorIntegrated locMap = try $ do
   g <- try $ pLocatorWordIntegrated (not wasImplicit) >>= modifier
   gs <- many (try $ pLocatorWordIntegrated False >>= modifier)
   let lo = T.concat (g:gs)
-  return (la, lo)
+  return $ LocatorInfo{ locatorLabel = la,
+                        locatorLoc = lo,
+                        locatorRaw = rawlab <> lo }
 
-pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Bool)
+pLocatorLabelIntegrated :: LocatorMap -> LocatorParser (Text, Text, Bool)
 pLocatorLabelIntegrated locMap
-  = pLocatorLabel' locMap lim <|> (lookAhead digital >> return ("page", True))
+  = pLocatorLabel' locMap lim <|>
+     (lookAhead digital >> return ("", "page", True))
     where
       lim = try $ pLocatorWordIntegrated True >>= requireRomansOrDigits
       digital = try $ pLocatorWordIntegrated True >>= requireDigits
 
 pLocatorLabel' :: LocatorMap -> LocatorParser Text
-               -> LocatorParser (Text, Bool)
+               -> LocatorParser (Text, Text, Bool)
 pLocatorLabel' locMap lim = go ""
     where
       -- grow the match string until we hit the end
@@ -105,9 +120,9 @@ pLocatorLabel' locMap lim = go ""
           t <- anyToken
           ts <- manyTill anyToken (try $ lookAhead lim)
           let s = acc <> stringify (t:ts)
-          case M.lookup (T.toCaseFold $ T.strip s) locMap of
+          case M.lookup (T.toCaseFold $ T.strip s) (unLocatorMap locMap) of
             -- try to find a longer one, or return this one
-            Just l -> go s <|> return (l, False)
+            Just l -> go s <|> return (s, l, False)
             Nothing -> go s
 
 -- hard requirement for a locator to have some real digits in it
@@ -247,27 +262,16 @@ isLocatorSep ',' = True
 isLocatorSep ';' = True
 isLocatorSep _   = False
 
-splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
-splitStrWhen _ [] = []
-splitStrWhen p (Str xs : ys) = go (T.unpack xs) ++ splitStrWhen p ys
-  where
-   go [] = []
-   go s = case break p s of
-             ([],[])     -> []
-             (zs,[])     -> [Str $ T.pack zs]
-             ([],w:ws) -> Str (T.singleton w) : go ws
-             (zs,w:ws) -> Str (T.pack zs) : Str (T.singleton w) : go ws
-splitStrWhen p (x : ys) = x : splitStrWhen p ys
-
 --
 -- Locator Map
 --
 
-type LocatorMap = M.Map Text Text
+newtype LocatorMap = LocatorMap { unLocatorMap :: M.Map Text Text }
+  deriving (Show)
 
 toLocatorMap :: Locale -> LocatorMap
 toLocatorMap locale =
-  foldr go mempty locatorTerms
+  LocatorMap $ foldr go mempty locatorTerms
  where
   go tname locmap =
     case M.lookup tname (localeTerms locale) of
diff --git a/src/Text/Pandoc/Citeproc/Util.hs b/src/Text/Pandoc/Citeproc/Util.hs
index 6d8e01bc9..8bffc0f32 100644
--- a/src/Text/Pandoc/Citeproc/Util.hs
+++ b/src/Text/Pandoc/Citeproc/Util.hs
@@ -1,9 +1,21 @@
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Citeproc.Util
- ( toIETF )
+ ( splitStrWhen
+ , toIETF )
 where
+import qualified Data.Text as T
 import Data.Text (Text)
+import Text.Pandoc.Definition
+
+-- Split Str elements so that characters satisfying the
+-- predicate each have their own Str.
+splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
+splitStrWhen p = foldr go []
+ where
+  go (Str t) = (map Str (T.groupBy goesTogether t) ++)
+  go x = (x :)
+  goesTogether c d   = not (p c || p d)
 
 toIETF :: Text -> Text
 toIETF "english"         = "en-US" -- "en-EN" unavailable in CSL
author	Igor Pashev <pashev.igor@gmail.com>	2021-12-29 15:00:59 +0200
committer	Igor Pashev <pashev.igor@gmail.com>	2021-12-29 15:00:59 +0200
commit	b4361712899fd0183fea5513180cb383979616de (patch)
tree	688ab7ee2ab3a8cd32b4e37b506099aec95388f7 /src/Text/Pandoc/Citeproc
parent	726ad97faebe59e024d68d293e663c02bbe423c8 (diff)
parent	d960282b105a6469c760b4308a3b81da723b7256 (diff)
download	pandoc-b4361712899fd0183fea5513180cb383979616de.tar.gz