From bb11f5fb86993559f9999d4795355b02ae78cc3d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 20 May 2021 12:06:15 -0700 Subject: LaTeX reader: More siunitx improvements. Closes #6658. There's still one slight divergence from the siunitx behavior: we get 'kg m/A/s' instead of 'kg m/(A s)'. At the moment I'm not going to worry about that. --- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 3 +- src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 138 +++++++++++++++++++++---------- test/command/6658.md | 72 ++++++++++++++-- 3 files changed, 161 insertions(+), 52 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index b6804a825..1c77eb299 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -806,7 +806,8 @@ withRaw parser = do keyval :: PandocMonad m => LP m (Text, Text) keyval = try $ do - Tok _ Word key <- satisfyTok isWordTok + key <- untokenize <$> many1 (notFollowedBy (symbol '=') >> + (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok)) sp val <- option mempty $ do symbol '=' diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index 63ab7267d..b8bf0ce7f 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -10,27 +10,32 @@ import Text.Pandoc.Class import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, optional, space, spaces, withRaw, (<|>)) import Control.Applicative ((<|>)) +import Control.Monad (void) import qualified Data.Map as M import Data.Char (isDigit) import Data.Text (Text) import qualified Data.Text as T import Data.List (intersperse) - +import qualified Data.Sequence as Seq +import Text.Pandoc.Walk (walk) siunitxCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) siunitxCommands tok = M.fromList - [ ("si", skipopts *> dosi tok) + [ ("si", dosi tok) , ("SI", doSI tok) , ("SIrange", doSIrange True tok) , ("numrange", doSIrange False tok) , ("numlist", doSInumlist) + , ("SIlist", doSIlist tok) , ("num", doSInum) , ("ang", doSIang) ] dosi :: PandocMonad m => LP m Inlines -> LP m Inlines -dosi tok = grouped (siUnit tok) <|> siUnit tok +dosi tok = do + options <- option [] keyvals + grouped (siUnit options tok) <|> siUnit options tok -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" doSI :: PandocMonad m => LP m Inlines -> LP m Inlines @@ -65,9 +70,29 @@ doSInumlist = do mconcat (intersperse (str "," <> space) (init xs)) <> text ", & " <> last xs +doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines +doSIlist tok = do + options <- option [] keyvals + nums <- map tonum . T.splitOn ";" . untokenize <$> braced + unit <- grouped (siUnit options tok) <|> siUnit options tok + let xs = map (<> (str "\xa0" <> unit)) nums + case xs of + [] -> return mempty + [x] -> return x + _ -> return $ + mconcat (intersperse (str "," <> space) (init xs)) <> + text ", & " <> last xs + parseNum :: Parser Text () Inlines parseNum = (mconcat <$> many parseNumPart) <* eof +minus :: Text +minus = "\x2212" + +hyphenToMinus :: Inline -> Inline +hyphenToMinus (Str t) = Str (T.replace "-" minus t) +hyphenToMinus x = x + parseNumPart :: Parser Text () Inlines parseNumPart = parseDecimalNum <|> @@ -83,7 +108,7 @@ parseNumPart = parseComma, parseI, parseX, parseExp, parseSpace :: Parser Text () Inlines parseDecimalNum = try $ do - pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-') + pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-') basenum' <- many1 (satisfy (\c -> isDigit c || c == '.')) let basenum = pref <> T.pack (case basenum' of @@ -155,20 +180,30 @@ doSIrange includeUnits tok = do emptyOr160 :: Inlines -> Inlines emptyOr160 x = if x == mempty then x else str "\160" -siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines -siUnit tok = mconcat <$> many1 siUnitPart +siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines +siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart where siUnitPart :: LP m Inlines - siUnitPart = - (siPrefix <*> siUnitPart) - <|> (do u <- siBase <|> tok - option u $ siSuffix <*> pure u) + siUnitPart = try $ do + skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1) + x <- ((siPrefix <*> siBase) + <|> (do u <- siBase <|> tok + option u $ siSuffix <*> pure u)) + option x (siInfix x) + siInfix :: Inlines -> LP m Inlines + siInfix u1 = try $ + (do _ <- controlSeq "per" + u2 <- siUnitPart + let useSlash = lookup "per-mode" options == Just "symbol" + if useSlash + then return (u1 <> str "/" <> u2) + else return (u1 <> str "\xa0" <> negateExponent u2)) + <|> (do _ <- symbol '/' + u2 <- siUnitPart + return (u1 <> str "/" <> u2)) siPrefix :: LP m (Inlines -> Inlines) siPrefix = - (do _ <- controlSeq "per" - skipopts -- TODO handle option - return (str "/" <>)) - <|> (do _ <- controlSeq "square" + (do _ <- controlSeq "square" skipopts return (<> superscript "2")) <|> (do _ <- controlSeq "cubic" @@ -176,7 +211,7 @@ siUnit tok = mconcat <$> many1 siUnitPart return (<> superscript "3")) <|> (do _ <- controlSeq "raisetothe" skipopts - n <- tok + n <- walk hyphenToMinus <$> tok return (<> superscript n)) siSuffix :: LP m (Inlines -> Inlines) siSuffix = @@ -188,23 +223,57 @@ siUnit tok = mconcat <$> many1 siUnitPart return (<> superscript "3")) <|> (do _ <- controlSeq "tothe" skipopts - n <- tok + n <- walk hyphenToMinus <$> tok return (<> superscript n)) + <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok + return (<> superscript n))) + <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok + return (<> subscript n))) + negateExponent :: Inlines -> Inlines + negateExponent ils = + case Seq.viewr (unMany ils) of + xs Seq.:> Superscript ss -> (Many xs) <> + superscript (str minus <> fromList ss) + _ -> ils <> superscript (str (minus <> "1")) siBase :: LP m Inlines - siBase = mconcat <$> many1 + siBase = ((try (do Tok _ (CtrlSeq name) _ <- anyControlSeq - case M.lookup name siUnitMap of - Just il -> pure il - Nothing -> fail "not a unit command")) + case M.lookup name siUnitModifierMap of + Just il -> (il <>) <$> siBase + Nothing -> + case M.lookup name siUnitMap of + Just il -> pure il + Nothing -> fail "not a unit command")) <|> (do Tok _ Word t <- satisfyTok isWordTok return $ str t) - <|> (symbol '^' *> (superscript <$> tok)) - <|> (symbol '_' *> (subscript <$> tok)) - <|> (str "\xa0" <$ symbol '.') - <|> (str "\xa0" <$ symbol '~') ) +siUnitModifierMap :: M.Map Text Inlines +siUnitModifierMap = M.fromList + [ ("atto", str "a") + , ("centi", str "c") + , ("deca", str "d") + , ("deci", str "d") + , ("deka", str "d") + , ("exa", str "E") + , ("femto", str "f") + , ("giga", str "G") + , ("hecto", str "h") + , ("kilo", str "k") + , ("mega", str "M") + , ("micro", str "μ") + , ("milli", str "m") + , ("nano", str "n") + , ("peta", str "P") + , ("pico", str "p") + , ("tera", str "T") + , ("yocto", str "y") + , ("yotta", str "Y") + , ("zepto", str "z") + , ("zetta", str "Z") + ] + siUnitMap :: M.Map Text Inlines siUnitMap = M.fromList [ ("fg", str "fg") @@ -303,7 +372,6 @@ siUnitMap = M.fromList , ("arcsecond", str "″") , ("astronomicalunit", str "ua") , ("atomicmassunit", str "u") - , ("atto", str "a") , ("bar", str "bar") , ("barn", str "b") , ("becquerel", str "Bq") @@ -311,51 +379,38 @@ siUnitMap = M.fromList , ("bohr", emph (str "a") <> subscript (str "0")) , ("candela", str "cd") , ("celsius", str "°C") - , ("centi", str "c") , ("clight", emph (str "c") <> subscript (str "0")) , ("coulomb", str "C") , ("dalton", str "Da") , ("day", str "d") - , ("deca", str "d") - , ("deci", str "d") , ("decibel", str "db") , ("degreeCelsius",str "°C") , ("degree", str "°") - , ("deka", str "d") , ("electronmass", emph (str "m") <> subscript (str "e")) , ("electronvolt", str "eV") , ("elementarycharge", emph (str "e")) - , ("exa", str "E") , ("farad", str "F") - , ("femto", str "f") - , ("giga", str "G") , ("gram", str "g") , ("gray", str "Gy") , ("hartree", emph (str "E") <> subscript (str "h")) , ("hectare", str "ha") - , ("hecto", str "h") , ("henry", str "H") , ("hertz", str "Hz") , ("hour", str "h") , ("joule", str "J") , ("katal", str "kat") , ("kelvin", str "K") - , ("kilo", str "k") , ("kilogram", str "kg") , ("knot", str "kn") , ("liter", str "L") , ("litre", str "l") , ("lumen", str "lm") , ("lux", str "lx") - , ("mega", str "M") , ("meter", str "m") , ("metre", str "m") - , ("micro", str "μ") - , ("milli", str "m") , ("minute", str "min") , ("mmHg", str "mmHg") , ("mole", str "mol") - , ("nano", str "n") , ("nauticalmile", str "M") , ("neper", str "Np") , ("newton", str "N") @@ -363,24 +418,17 @@ siUnitMap = M.fromList , ("Pa", str "Pa") , ("pascal", str "Pa") , ("percent", str "%") - , ("peta", str "P") - , ("pico", str "p") , ("planckbar", emph (str "\x210f")) , ("radian", str "rad") , ("second", str "s") , ("siemens", str "S") , ("sievert", str "Sv") , ("steradian", str "sr") - , ("tera", str "T") , ("tesla", str "T") , ("tonne", str "t") , ("volt", str "V") , ("watt", str "W") , ("weber", str "Wb") - , ("yocto", str "y") - , ("yotta", str "Y") - , ("zepto", str "z") - , ("zetta", str "Z") ] diff --git a/test/command/6658.md b/test/command/6658.md index 549610992..96700c8fe 100644 --- a/test/command/6658.md +++ b/test/command/6658.md @@ -1,15 +1,75 @@ ``` -pandoc -f latex -\SI{10.0 +- 3.3}{\ms} +pandoc -f latex -t html +\num{12345,67890} + +\num{1+-2i} \num{.3e45} -\ang{+10;+3;} +\num{1.654 x 2.34 x 3.430} + +\si{kg.m.s^{-1}} + +\si{\kilogram\metre\per\second} + +\si[per-mode=symbol]{\kilogram\metre\per\second} + +\si[per-mode=symbol]{\kilogram\metre\per\ampere\per\second} + +\numlist{10;20;30} + +\SIlist{0.13;0.67;0.80}{\milli\metre} + +\numrange{10}{20} + +\SIrange{0.13}{0.67}{\milli\metre} + +\ang{10} + +\ang{1;2;3} + +\ang{;;1} + +\ang{+10;;} + +\ang{-0;1;} + +\si{kg.m/s^2} + +\si{g_{polymer}~mol_{cat}.s^{-1}} + +\si{\kilo\gram\metre\per\square\second} \si{\gram\per\cubic\centi\metre} + +\si{\square\volt\cubic\lumen\per\farad} + +\si{\metre\squared\per\gray\cubic\lux} + +\si{\henry\second} ^D -

10.0 ± 3.3 ms

+

12345.67890

+

1 ± 2i

0.3 × 1045

-

10°3′

-

g/cm3

+

1.654 × 2.34 × 3.430

+

kg m s−1

+

kg m s−1

+

kg m/s

+

kg m/A/s

+

10, 20, & 30

+

0.13 mm, 0.67 mm, & 0.80 mm

+

10–20

+

0.13 mm–0.67 mm

+

10°

+

1°2′3″

+

1″

+

10°

+

-0°1′

+

kg m/s2

+

gpolymer molcat s−1

+

kg m s−2

+

g cm−3

+

V2 lm3 F−1

+

m2 Gy−1 lx3

+

H s

``` -- cgit v1.2.3