From bb11f5fb86993559f9999d4795355b02ae78cc3d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 12:06:15 -0700
Subject: LaTeX reader: More siunitx improvements.  Closes #6658.

There's still one slight divergence from the siunitx behavior:
we get 'kg m/A/s' instead of 'kg m/(A s)'. At the moment I'm
not going to worry about that.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |   3 +-
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 138 +++++++++++++++++++++----------
 2 files changed, 95 insertions(+), 46 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index b6804a825..1c77eb299 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -806,7 +806,8 @@ withRaw parser = do
 
 keyval :: PandocMonad m => LP m (Text, Text)
 keyval = try $ do
-  Tok _ Word key <- satisfyTok isWordTok
+  key <- untokenize <$> many1 (notFollowedBy (symbol '=') >>
+                         (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok))
   sp
   val <- option mempty $ do
            symbol '='
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 63ab7267d..b8bf0ce7f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -10,27 +10,32 @@ import Text.Pandoc.Class
 import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
 import Control.Applicative ((<|>))
+import Control.Monad (void)
 import qualified Data.Map as M
 import Data.Char (isDigit)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.List (intersperse)
-
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Walk (walk)
 
 siunitxCommands :: PandocMonad m
                  => LP m Inlines -> M.Map Text (LP m Inlines)
 siunitxCommands tok = M.fromList
-  [ ("si", skipopts *> dosi tok)
+  [ ("si", dosi tok)
   , ("SI", doSI tok)
   , ("SIrange", doSIrange True tok)
   , ("numrange", doSIrange False tok)
   , ("numlist", doSInumlist)
+  , ("SIlist", doSIlist tok)
   , ("num", doSInum)
   , ("ang", doSIang)
   ]
 
 dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
-dosi tok = grouped (siUnit tok) <|> siUnit tok
+dosi tok = do
+  options <- option [] keyvals
+  grouped (siUnit options tok) <|> siUnit options tok
 
 -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
 doSI :: PandocMonad m => LP m Inlines -> LP m Inlines
@@ -65,9 +70,29 @@ doSInumlist = do
              mconcat (intersperse (str "," <> space) (init xs)) <>
              text ", & " <> last xs
 
+doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines
+doSIlist tok = do
+  options <- option [] keyvals
+  nums <- map tonum . T.splitOn ";" . untokenize <$> braced
+  unit <- grouped (siUnit options tok) <|> siUnit options tok
+  let xs = map (<> (str "\xa0" <> unit)) nums
+  case xs of
+    []  -> return mempty
+    [x] -> return x
+    _   -> return $
+             mconcat (intersperse (str "," <> space) (init xs)) <>
+             text ", & " <> last xs
+
 parseNum :: Parser Text () Inlines
 parseNum = (mconcat <$> many parseNumPart) <* eof
 
+minus :: Text
+minus = "\x2212"
+
+hyphenToMinus :: Inline -> Inline
+hyphenToMinus (Str t) = Str (T.replace "-" minus t)
+hyphenToMinus x = x
+
 parseNumPart :: Parser Text () Inlines
 parseNumPart =
   parseDecimalNum <|>
@@ -83,7 +108,7 @@ parseNumPart =
     parseComma, parseI, parseX,
     parseExp, parseSpace :: Parser Text () Inlines
   parseDecimalNum = try $ do
-    pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
+    pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-')
     basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
     let basenum = pref <> T.pack
                     (case basenum' of
@@ -155,20 +180,30 @@ doSIrange includeUnits tok = do
 emptyOr160 :: Inlines -> Inlines
 emptyOr160 x = if x == mempty then x else str "\160"
 
-siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = mconcat <$> many1 siUnitPart
+siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines
+siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart
  where
   siUnitPart :: LP m Inlines
-  siUnitPart =
-        (siPrefix <*> siUnitPart)
-    <|> (do u <- siBase <|> tok
-            option u $ siSuffix <*> pure u)
+  siUnitPart = try $ do
+    skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1)
+    x <- ((siPrefix <*> siBase)
+            <|> (do u <- siBase <|> tok
+                    option u $ siSuffix <*> pure u))
+    option x (siInfix x)
+  siInfix :: Inlines -> LP m Inlines
+  siInfix u1 = try $
+       (do _ <- controlSeq "per"
+           u2 <- siUnitPart
+           let useSlash = lookup "per-mode" options == Just "symbol"
+           if useSlash
+              then return (u1 <> str "/" <> u2)
+              else return (u1 <> str "\xa0" <> negateExponent u2))
+   <|> (do _ <- symbol '/'
+           u2 <- siUnitPart
+           return (u1 <> str "/" <> u2))
   siPrefix :: LP m (Inlines -> Inlines)
   siPrefix =
-       (do _ <- controlSeq "per"
-           skipopts -- TODO handle option
-           return (str "/" <>))
-   <|> (do _ <- controlSeq "square"
+       (do _ <- controlSeq "square"
            skipopts
            return (<> superscript "2"))
    <|> (do _ <- controlSeq "cubic"
@@ -176,7 +211,7 @@ siUnit tok = mconcat <$> many1 siUnitPart
            return (<> superscript "3"))
    <|> (do _ <- controlSeq "raisetothe"
            skipopts
-           n <- tok
+           n <- walk hyphenToMinus <$> tok
            return (<> superscript n))
   siSuffix :: LP m (Inlines -> Inlines)
   siSuffix =
@@ -188,23 +223,57 @@ siUnit tok = mconcat <$> many1 siUnitPart
            return (<> superscript "3"))
    <|> (do _ <- controlSeq "tothe"
            skipopts
-           n <- tok
+           n <- walk hyphenToMinus <$> tok
            return (<> superscript n))
+   <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> superscript n)))
+   <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> subscript n)))
+  negateExponent :: Inlines -> Inlines
+  negateExponent ils =
+    case Seq.viewr (unMany ils) of
+      xs Seq.:> Superscript ss -> (Many xs) <>
+                                     superscript (str minus <> fromList ss)
+      _ -> ils <> superscript (str (minus <> "1"))
   siBase :: LP m Inlines
-  siBase = mconcat <$> many1
+  siBase =
     ((try
        (do Tok _ (CtrlSeq name) _ <- anyControlSeq
-           case M.lookup name siUnitMap of
-              Just il -> pure il
-              Nothing -> fail "not a unit command"))
+           case M.lookup name siUnitModifierMap of
+              Just il -> (il <>) <$> siBase
+              Nothing ->
+                case M.lookup name siUnitMap of
+                   Just il -> pure il
+                   Nothing -> fail "not a unit command"))
     <|> (do Tok _ Word t <- satisfyTok isWordTok
             return $ str t)
-    <|> (symbol '^' *> (superscript <$> tok))
-    <|> (symbol '_' *> (subscript <$> tok))
-    <|> (str "\xa0" <$ symbol '.')
-    <|> (str "\xa0" <$ symbol '~')
      )
 
+siUnitModifierMap :: M.Map Text Inlines
+siUnitModifierMap = M.fromList
+  [ ("atto", str "a")
+  , ("centi", str "c")
+  , ("deca", str "d")
+  , ("deci", str "d")
+  , ("deka", str "d")
+  , ("exa", str "E")
+  , ("femto", str "f")
+  , ("giga", str "G")
+  , ("hecto", str "h")
+  , ("kilo", str "k")
+  , ("mega", str "M")
+  , ("micro", str "μ")
+  , ("milli", str "m")
+  , ("nano", str "n")
+  , ("peta", str "P")
+  , ("pico", str "p")
+  , ("tera", str "T")
+  , ("yocto", str "y")
+  , ("yotta", str "Y")
+  , ("zepto", str "z")
+  , ("zetta", str "Z")
+  ]
+
 siUnitMap :: M.Map Text Inlines
 siUnitMap = M.fromList
   [ ("fg", str "fg")
@@ -303,7 +372,6 @@ siUnitMap = M.fromList
   , ("arcsecond", str "″")
   , ("astronomicalunit", str "ua")
   , ("atomicmassunit", str "u")
-  , ("atto", str "a")
   , ("bar", str "bar")
   , ("barn", str "b")
   , ("becquerel", str "Bq")
@@ -311,51 +379,38 @@ siUnitMap = M.fromList
   , ("bohr", emph (str "a") <> subscript (str "0"))
   , ("candela", str "cd")
   , ("celsius", str "°C")
-  , ("centi", str "c")
   , ("clight", emph (str "c") <> subscript (str "0"))
   , ("coulomb", str "C")
   , ("dalton", str "Da")
   , ("day", str "d")
-  , ("deca", str "d")
-  , ("deci", str "d")
   , ("decibel", str "db")
   , ("degreeCelsius",str "°C")
   , ("degree", str "°")
-  , ("deka", str "d")
   , ("electronmass", emph (str "m") <> subscript (str "e"))
   , ("electronvolt", str "eV")
   , ("elementarycharge", emph (str "e"))
-  , ("exa", str "E")
   , ("farad", str "F")
-  , ("femto", str "f")
-  , ("giga", str "G")
   , ("gram", str "g")
   , ("gray", str "Gy")
   , ("hartree", emph (str "E") <> subscript (str "h"))
   , ("hectare", str "ha")
-  , ("hecto", str "h")
   , ("henry", str "H")
   , ("hertz", str "Hz")
   , ("hour", str "h")
   , ("joule", str "J")
   , ("katal", str "kat")
   , ("kelvin", str "K")
-  , ("kilo", str "k")
   , ("kilogram", str "kg")
   , ("knot", str "kn")
   , ("liter", str "L")
   , ("litre", str "l")
   , ("lumen", str "lm")
   , ("lux", str "lx")
-  , ("mega", str "M")
   , ("meter", str "m")
   , ("metre", str "m")
-  , ("micro", str "μ")
-  , ("milli", str "m")
   , ("minute", str "min")
   , ("mmHg", str "mmHg")
   , ("mole", str "mol")
-  , ("nano", str "n")
   , ("nauticalmile", str "M")
   , ("neper", str "Np")
   , ("newton", str "N")
@@ -363,24 +418,17 @@ siUnitMap = M.fromList
   , ("Pa", str "Pa")
   , ("pascal", str "Pa")
   , ("percent", str "%")
-  , ("peta", str "P")
-  , ("pico", str "p")
   , ("planckbar", emph (str "\x210f"))
   , ("radian", str "rad")
   , ("second", str "s")
   , ("siemens", str "S")
   , ("sievert", str "Sv")
   , ("steradian", str "sr")
-  , ("tera", str "T")
   , ("tesla", str "T")
   , ("tonne", str "t")
   , ("volt", str "V")
   , ("watt", str "W")
   , ("weber", str "Wb")
-  , ("yocto", str "y")
-  , ("yotta", str "Y")
-  , ("zepto", str "z")
-  , ("zetta", str "Z")
   ]
 
 
-- 
cgit v1.2.3