aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-05-20 12:06:15 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2021-05-20 15:30:31 -0700
commitbb11f5fb86993559f9999d4795355b02ae78cc3d (patch)
tree6dbf8c51250db532167b08c0197cbeb79279ba30
parent4e990a8cf9207f2315d6a55a45c93c2857663316 (diff)
downloadpandoc-bb11f5fb86993559f9999d4795355b02ae78cc3d.tar.gz
LaTeX reader: More siunitx improvements. Closes #6658.
There's still one slight divergence from the siunitx behavior: we get 'kg m/A/s' instead of 'kg m/(A s)'. At the moment I'm not going to worry about that.
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs3
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/SIunitx.hs138
-rw-r--r--test/command/6658.md72
3 files changed, 161 insertions, 52 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index b6804a825..1c77eb299 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -806,7 +806,8 @@ withRaw parser = do
keyval :: PandocMonad m => LP m (Text, Text)
keyval = try $ do
- Tok _ Word key <- satisfyTok isWordTok
+ key <- untokenize <$> many1 (notFollowedBy (symbol '=') >>
+ (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok))
sp
val <- option mempty $ do
symbol '='
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 63ab7267d..b8bf0ce7f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -10,27 +10,32 @@ import Text.Pandoc.Class
import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
optional, space, spaces, withRaw, (<|>))
import Control.Applicative ((<|>))
+import Control.Monad (void)
import qualified Data.Map as M
import Data.Char (isDigit)
import Data.Text (Text)
import qualified Data.Text as T
import Data.List (intersperse)
-
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Walk (walk)
siunitxCommands :: PandocMonad m
=> LP m Inlines -> M.Map Text (LP m Inlines)
siunitxCommands tok = M.fromList
- [ ("si", skipopts *> dosi tok)
+ [ ("si", dosi tok)
, ("SI", doSI tok)
, ("SIrange", doSIrange True tok)
, ("numrange", doSIrange False tok)
, ("numlist", doSInumlist)
+ , ("SIlist", doSIlist tok)
, ("num", doSInum)
, ("ang", doSIang)
]
dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
-dosi tok = grouped (siUnit tok) <|> siUnit tok
+dosi tok = do
+ options <- option [] keyvals
+ grouped (siUnit options tok) <|> siUnit options tok
-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
doSI :: PandocMonad m => LP m Inlines -> LP m Inlines
@@ -65,9 +70,29 @@ doSInumlist = do
mconcat (intersperse (str "," <> space) (init xs)) <>
text ", & " <> last xs
+doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines
+doSIlist tok = do
+ options <- option [] keyvals
+ nums <- map tonum . T.splitOn ";" . untokenize <$> braced
+ unit <- grouped (siUnit options tok) <|> siUnit options tok
+ let xs = map (<> (str "\xa0" <> unit)) nums
+ case xs of
+ [] -> return mempty
+ [x] -> return x
+ _ -> return $
+ mconcat (intersperse (str "," <> space) (init xs)) <>
+ text ", & " <> last xs
+
parseNum :: Parser Text () Inlines
parseNum = (mconcat <$> many parseNumPart) <* eof
+minus :: Text
+minus = "\x2212"
+
+hyphenToMinus :: Inline -> Inline
+hyphenToMinus (Str t) = Str (T.replace "-" minus t)
+hyphenToMinus x = x
+
parseNumPart :: Parser Text () Inlines
parseNumPart =
parseDecimalNum <|>
@@ -83,7 +108,7 @@ parseNumPart =
parseComma, parseI, parseX,
parseExp, parseSpace :: Parser Text () Inlines
parseDecimalNum = try $ do
- pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
+ pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-')
basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
let basenum = pref <> T.pack
(case basenum' of
@@ -155,20 +180,30 @@ doSIrange includeUnits tok = do
emptyOr160 :: Inlines -> Inlines
emptyOr160 x = if x == mempty then x else str "\160"
-siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = mconcat <$> many1 siUnitPart
+siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines
+siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart
where
siUnitPart :: LP m Inlines
- siUnitPart =
- (siPrefix <*> siUnitPart)
- <|> (do u <- siBase <|> tok
- option u $ siSuffix <*> pure u)
+ siUnitPart = try $ do
+ skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1)
+ x <- ((siPrefix <*> siBase)
+ <|> (do u <- siBase <|> tok
+ option u $ siSuffix <*> pure u))
+ option x (siInfix x)
+ siInfix :: Inlines -> LP m Inlines
+ siInfix u1 = try $
+ (do _ <- controlSeq "per"
+ u2 <- siUnitPart
+ let useSlash = lookup "per-mode" options == Just "symbol"
+ if useSlash
+ then return (u1 <> str "/" <> u2)
+ else return (u1 <> str "\xa0" <> negateExponent u2))
+ <|> (do _ <- symbol '/'
+ u2 <- siUnitPart
+ return (u1 <> str "/" <> u2))
siPrefix :: LP m (Inlines -> Inlines)
siPrefix =
- (do _ <- controlSeq "per"
- skipopts -- TODO handle option
- return (str "/" <>))
- <|> (do _ <- controlSeq "square"
+ (do _ <- controlSeq "square"
skipopts
return (<> superscript "2"))
<|> (do _ <- controlSeq "cubic"
@@ -176,7 +211,7 @@ siUnit tok = mconcat <$> many1 siUnitPart
return (<> superscript "3"))
<|> (do _ <- controlSeq "raisetothe"
skipopts
- n <- tok
+ n <- walk hyphenToMinus <$> tok
return (<> superscript n))
siSuffix :: LP m (Inlines -> Inlines)
siSuffix =
@@ -188,23 +223,57 @@ siUnit tok = mconcat <$> many1 siUnitPart
return (<> superscript "3"))
<|> (do _ <- controlSeq "tothe"
skipopts
- n <- tok
+ n <- walk hyphenToMinus <$> tok
return (<> superscript n))
+ <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok
+ return (<> superscript n)))
+ <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok
+ return (<> subscript n)))
+ negateExponent :: Inlines -> Inlines
+ negateExponent ils =
+ case Seq.viewr (unMany ils) of
+ xs Seq.:> Superscript ss -> (Many xs) <>
+ superscript (str minus <> fromList ss)
+ _ -> ils <> superscript (str (minus <> "1"))
siBase :: LP m Inlines
- siBase = mconcat <$> many1
+ siBase =
((try
(do Tok _ (CtrlSeq name) _ <- anyControlSeq
- case M.lookup name siUnitMap of
- Just il -> pure il
- Nothing -> fail "not a unit command"))
+ case M.lookup name siUnitModifierMap of
+ Just il -> (il <>) <$> siBase
+ Nothing ->
+ case M.lookup name siUnitMap of
+ Just il -> pure il
+ Nothing -> fail "not a unit command"))
<|> (do Tok _ Word t <- satisfyTok isWordTok
return $ str t)
- <|> (symbol '^' *> (superscript <$> tok))
- <|> (symbol '_' *> (subscript <$> tok))
- <|> (str "\xa0" <$ symbol '.')
- <|> (str "\xa0" <$ symbol '~')
)
+siUnitModifierMap :: M.Map Text Inlines
+siUnitModifierMap = M.fromList
+ [ ("atto", str "a")
+ , ("centi", str "c")
+ , ("deca", str "d")
+ , ("deci", str "d")
+ , ("deka", str "d")
+ , ("exa", str "E")
+ , ("femto", str "f")
+ , ("giga", str "G")
+ , ("hecto", str "h")
+ , ("kilo", str "k")
+ , ("mega", str "M")
+ , ("micro", str "μ")
+ , ("milli", str "m")
+ , ("nano", str "n")
+ , ("peta", str "P")
+ , ("pico", str "p")
+ , ("tera", str "T")
+ , ("yocto", str "y")
+ , ("yotta", str "Y")
+ , ("zepto", str "z")
+ , ("zetta", str "Z")
+ ]
+
siUnitMap :: M.Map Text Inlines
siUnitMap = M.fromList
[ ("fg", str "fg")
@@ -303,7 +372,6 @@ siUnitMap = M.fromList
, ("arcsecond", str "″")
, ("astronomicalunit", str "ua")
, ("atomicmassunit", str "u")
- , ("atto", str "a")
, ("bar", str "bar")
, ("barn", str "b")
, ("becquerel", str "Bq")
@@ -311,51 +379,38 @@ siUnitMap = M.fromList
, ("bohr", emph (str "a") <> subscript (str "0"))
, ("candela", str "cd")
, ("celsius", str "°C")
- , ("centi", str "c")
, ("clight", emph (str "c") <> subscript (str "0"))
, ("coulomb", str "C")
, ("dalton", str "Da")
, ("day", str "d")
- , ("deca", str "d")
- , ("deci", str "d")
, ("decibel", str "db")
, ("degreeCelsius",str "°C")
, ("degree", str "°")
- , ("deka", str "d")
, ("electronmass", emph (str "m") <> subscript (str "e"))
, ("electronvolt", str "eV")
, ("elementarycharge", emph (str "e"))
- , ("exa", str "E")
, ("farad", str "F")
- , ("femto", str "f")
- , ("giga", str "G")
, ("gram", str "g")
, ("gray", str "Gy")
, ("hartree", emph (str "E") <> subscript (str "h"))
, ("hectare", str "ha")
- , ("hecto", str "h")
, ("henry", str "H")
, ("hertz", str "Hz")
, ("hour", str "h")
, ("joule", str "J")
, ("katal", str "kat")
, ("kelvin", str "K")
- , ("kilo", str "k")
, ("kilogram", str "kg")
, ("knot", str "kn")
, ("liter", str "L")
, ("litre", str "l")
, ("lumen", str "lm")
, ("lux", str "lx")
- , ("mega", str "M")
, ("meter", str "m")
, ("metre", str "m")
- , ("micro", str "μ")
- , ("milli", str "m")
, ("minute", str "min")
, ("mmHg", str "mmHg")
, ("mole", str "mol")
- , ("nano", str "n")
, ("nauticalmile", str "M")
, ("neper", str "Np")
, ("newton", str "N")
@@ -363,24 +418,17 @@ siUnitMap = M.fromList
, ("Pa", str "Pa")
, ("pascal", str "Pa")
, ("percent", str "%")
- , ("peta", str "P")
- , ("pico", str "p")
, ("planckbar", emph (str "\x210f"))
, ("radian", str "rad")
, ("second", str "s")
, ("siemens", str "S")
, ("sievert", str "Sv")
, ("steradian", str "sr")
- , ("tera", str "T")
, ("tesla", str "T")
, ("tonne", str "t")
, ("volt", str "V")
, ("watt", str "W")
, ("weber", str "Wb")
- , ("yocto", str "y")
- , ("yotta", str "Y")
- , ("zepto", str "z")
- , ("zetta", str "Z")
]
diff --git a/test/command/6658.md b/test/command/6658.md
index 549610992..96700c8fe 100644
--- a/test/command/6658.md
+++ b/test/command/6658.md
@@ -1,15 +1,75 @@
```
-pandoc -f latex
-\SI{10.0 +- 3.3}{\ms}
+pandoc -f latex -t html
+\num{12345,67890}
+
+\num{1+-2i}
\num{.3e45}
-\ang{+10;+3;}
+\num{1.654 x 2.34 x 3.430}
+
+\si{kg.m.s^{-1}}
+
+\si{\kilogram\metre\per\second}
+
+\si[per-mode=symbol]{\kilogram\metre\per\second}
+
+\si[per-mode=symbol]{\kilogram\metre\per\ampere\per\second}
+
+\numlist{10;20;30}
+
+\SIlist{0.13;0.67;0.80}{\milli\metre}
+
+\numrange{10}{20}
+
+\SIrange{0.13}{0.67}{\milli\metre}
+
+\ang{10}
+
+\ang{1;2;3}
+
+\ang{;;1}
+
+\ang{+10;;}
+
+\ang{-0;1;}
+
+\si{kg.m/s^2}
+
+\si{g_{polymer}~mol_{cat}.s^{-1}}
+
+\si{\kilo\gram\metre\per\square\second}
\si{\gram\per\cubic\centi\metre}
+
+\si{\square\volt\cubic\lumen\per\farad}
+
+\si{\metre\squared\per\gray\cubic\lux}
+
+\si{\henry\second}
^D
-<p>10.0 ± 3.3 ms</p>
+<p>12345.67890</p>
+<p>1 ± 2i</p>
<p>0.3 × 10<sup>45</sup></p>
-<p>10°3′</p>
-<p>g/cm<sup>3</sup></p>
+<p>1.654 × 2.34 × 3.430</p>
+<p>kg m s<sup>−1</sup></p>
+<p>kg m s<sup>−1</sup></p>
+<p>kg m/s</p>
+<p>kg m/A/s</p>
+<p>10, 20, &amp; 30</p>
+<p>0.13 mm, 0.67 mm, &amp; 0.80 mm</p>
+<p>10–20</p>
+<p>0.13 mm–0.67 mm</p>
+<p>10°</p>
+<p>1°2′3″</p>
+<p>1″</p>
+<p>10°</p>
+<p>-0°1′</p>
+<p>kg m/s<sup>2</sup></p>
+<p>g<sub>polymer</sub> mol<sub>cat</sub> s<sup>−1</sup></p>
+<p>kg m s<sup>−2</sup></p>
+<p>g cm<sup>−3</sup></p>
+<p>V<sup>2</sup> lm<sup>3</sup> F<sup>−1</sup></p>
+<p>m<sup>2</sup> Gy<sup>−1</sup> lx<sup>3</sup></p>
+<p>H s</p>
```