aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2019-11-02 12:20:04 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2019-11-02 12:20:04 -0700
commit041cfbd5eeffd2bc79447c4894652fc3c108c40d (patch)
treec25be079905efba7495e1cab775fa0b188ab443c
parent6c9a20b2d322b3cdd112d4b479014407ef38d86e (diff)
downloadpandoc-041cfbd5eeffd2bc79447c4894652fc3c108c40d.tar.gz
LaTeX untokenize: Ensure space between control sequence and following letter.
Closes #5836.
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs1
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs16
2 files changed, 15 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index b4e9de258..94fd9427b 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -618,6 +618,7 @@ accent combiningAccent fallBack = try $ do
[Space] -> return $ str [fromMaybe combiningAccent fallBack]
[] -> return $ str [fromMaybe combiningAccent fallBack]
_ -> return ils
+
mathDisplay :: String -> Inlines
mathDisplay = displayMath . trimMath
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index af354843a..14cb408b0 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -92,6 +92,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
ArgSpec (..), Tok (..), TokType (..))
import Text.Pandoc.Shared
import Text.Parsec.Pos
+-- import Debug.Trace
newtype DottedNum = DottedNum [Int]
deriving (Show)
@@ -350,10 +351,21 @@ isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
untokenize :: [Tok] -> Text
-untokenize = mconcat . map untoken
+untokenize = foldr untokenAccum mempty
+
+untokenAccum :: Tok -> Text -> Text
+untokenAccum (Tok _ (CtrlSeq _) t) accum =
+ -- insert space to prevent breaking a control sequence; see #5836
+ case (T.unsnoc t, T.uncons accum) of
+ (Just (_,c), Just (d,_))
+ | isLetter c
+ , isLetter d
+ -> t <> " " <> accum
+ _ -> t <> accum
+untokenAccum (Tok _ _ t) accum = t <> accum
untoken :: Tok -> Text
-untoken (Tok _ _ t) = t
+untoken t = untokenAccum t mempty
toksToString :: [Tok] -> String
toksToString = T.unpack . untokenize