aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2019-02-01 21:17:46 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2019-02-01 21:17:46 -0800
commit633a9ecfecd7a111d6727bd4c44750dbb3c9af23 (patch)
tree9992326d40d693d644d253eeb1e26a4f5bfa6089
parentb436087bc8b59222fe031910aa85fea88cd6b1eb (diff)
downloadpandoc-633a9ecfecd7a111d6727bd4c44750dbb3c9af23.tar.gz
LaTeX writer: avoid `{}` after control sequences when escaping.
`\ldots{}.` doesn't behave as well as `\ldots.` with the latex ellipsis package. This patch causes pandoc to avoid emitting the `{}` when it is not necessary. Now `\ldots` and other control sequences used in escaping will be followed by either a `{}`, a space, or nothing, depending on context. Thanks to Elliott Slaughter for the suggestion.
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs62
-rw-r--r--test/command/1126.md5
-rw-r--r--test/command/ascii.md2
-rw-r--r--test/writer.latex4
4 files changed, 41 insertions, 32 deletions
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index bd48d50a6..307e96993 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -39,8 +39,8 @@ import Prelude
import Control.Applicative ((<|>))
import Control.Monad.State.Strict
import Data.Aeson (FromJSON, object, (.=))
-import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord,
- toLower)
+import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace,
+ isPunctuation, ord, toLower)
import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy,
stripPrefix, (\\), uncons)
import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing)
@@ -345,10 +345,20 @@ stringToLaTeX context zs = do
Just cmd -> ((cmd ++ "{" ++ [c] ++ "}") ++)
<$> go opts ctx (drop 1 xs) -- drop combining accent
Nothing -> (c:) <$> go opts ctx xs
+ let emitcseq cs = do
+ rest <- go opts ctx xs
+ case rest of
+ c:_ | isLetter c
+ , ctx == TextString
+ -> return (cs <> " " <> rest)
+ | isSpace c -> return (cs <> "{}" <> rest)
+ | ctx == TextString
+ -> return (cs <> rest)
+ _ -> return (cs <> "{}" <> rest)
case x of
'{' -> emits "\\{"
'}' -> emits "\\}"
- '`' | ctx == CodeString -> emits "\\textasciigrave{}"
+ '`' | ctx == CodeString -> emitcseq "\\textasciigrave"
'$' | not isUrl -> emits "\\$"
'%' -> emits "\\%"
'&' -> emits "\\&"
@@ -358,19 +368,19 @@ stringToLaTeX context zs = do
-- prevent adjacent hyphens from forming ligatures
('-':_) -> emits "-\\/"
_ -> emitc '-'
- '~' | not isUrl -> emits "\\textasciitilde{}"
+ '~' | not isUrl -> emitcseq "\\textasciitilde"
'^' -> emits "\\^{}"
'\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows
- | otherwise -> emits "\\textbackslash{}"
- '|' | not isUrl -> emits "\\textbar{}"
- '<' -> emits "\\textless{}"
- '>' -> emits "\\textgreater{}"
+ | otherwise -> emitcseq "\\textbackslash"
+ '|' | not isUrl -> emitcseq "\\textbar"
+ '<' -> emitcseq "\\textless"
+ '>' -> emitcseq "\\textgreater"
'[' -> emits "{[}" -- to avoid interpretation as
']' -> emits "{]}" -- optional arguments
- '\'' | ctx == CodeString -> emits "\\textquotesingle{}"
+ '\'' | ctx == CodeString -> emitcseq "\\textquotesingle"
'\160' -> emits "~"
'\x202F' -> emits "\\,"
- '\x2026' -> emits "\\ldots{}"
+ '\x2026' -> emitcseq "\\ldots"
'\x2018' | ligatures -> emits "`"
'\x2019' | ligatures -> emits "'"
'\x201C' | ligatures -> emits "``"
@@ -379,22 +389,22 @@ stringToLaTeX context zs = do
'\x2013' | ligatures -> emits "--"
_ | writerPreferAscii opts
-> case x of
- 'ı' -> emits "\\i "
- 'ȷ' -> emits "\\j "
- 'å' -> emits "\\aa "
- 'Å' -> emits "\\AA "
- 'ß' -> emits "\\ss "
- 'ø' -> emits "\\o "
- 'Ø' -> emits "\\O "
- 'Ł' -> emits "\\L "
- 'ł' -> emits "\\l "
- 'æ' -> emits "\\ae "
- 'Æ' -> emits "\\AE "
- 'œ' -> emits "\\oe "
- 'Œ' -> emits "\\OE "
- '£' -> emits "\\pounds "
- '€' -> emits "\\euro "
- '©' -> emits "\\copyright "
+ 'ı' -> emitcseq "\\i"
+ 'ȷ' -> emitcseq "\\j"
+ 'å' -> emitcseq "\\aa"
+ 'Å' -> emitcseq "\\AA"
+ 'ß' -> emitcseq "\\ss"
+ 'ø' -> emitcseq "\\o"
+ 'Ø' -> emitcseq "\\O"
+ 'Ł' -> emitcseq "\\L"
+ 'ł' -> emitcseq "\\l"
+ 'æ' -> emitcseq "\\ae"
+ 'Æ' -> emitcseq "\\AE"
+ 'œ' -> emitcseq "\\oe"
+ 'Œ' -> emitcseq "\\OE"
+ '£' -> emitcseq "\\pounds"
+ '€' -> emitcseq "\\euro"
+ '©' -> emitcseq "\\copyright"
_ -> emitc x
| otherwise -> emitc x
diff --git a/test/command/1126.md b/test/command/1126.md
index 014a8ae2d..e78646fad 100644
--- a/test/command/1126.md
+++ b/test/command/1126.md
@@ -5,9 +5,8 @@ A&=&B,\\
C&=&D
\end{eqnarray}
^D
-\textbackslash{}begin\{eqnarray\}
-A\&=\&B,\textbackslash{}\textbackslash{} C\&=\&D
-\textbackslash{}end\{eqnarray\}
+\textbackslash begin\{eqnarray\} A\&=\&B,\textbackslash\textbackslash{}
+C\&=\&D \textbackslash end\{eqnarray\}
```
```
diff --git a/test/command/ascii.md b/test/command/ascii.md
index d01389a60..492bf248f 100644
--- a/test/command/ascii.md
+++ b/test/command/ascii.md
@@ -9,7 +9,7 @@ pandoc -t html --ascii
pandoc -t latex --ascii
äéıå
^D
-\"{a}\'{e}\i \r{a}
+\"{a}\'{e}\i\r{a}
```
```
diff --git a/test/writer.latex b/test/writer.latex
index 04ba7cd15..45723c94e 100644
--- a/test/writer.latex
+++ b/test/writer.latex
@@ -719,7 +719,7 @@ Subscripts: H\textsubscript{2}O, H\textsubscript{23}O,
H\textsubscript{many~of~them}O.
These should not be superscripts or subscripts, because of the unescaped
-spaces: a\^{}b c\^{}d, a\textasciitilde{}b c\textasciitilde{}d.
+spaces: a\^{}b c\^{}d, a\textasciitilde b c\textasciitilde d.
\begin{center}\rule{0.5\linewidth}{\linethickness}\end{center}
@@ -741,7 +741,7 @@ Some dashes: one---two --- three---four --- five.
Dashes between numbers: 5--7, 255--66, 1987--1999.
-Ellipses\ldots{}and\ldots{}and\ldots{}.
+Ellipses\ldots and\ldots and\ldots.
\begin{center}\rule{0.5\linewidth}{\linethickness}\end{center}