aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-07-11 13:01:45 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2021-07-11 13:50:28 -0700
commitac0a9da6d85e9b7a73973a20019caa324b2c1aff (patch)
tree947537c11f2a656d29fa898b845fa9454257a606 /src/Text/Pandoc/Readers/LaTeX/Parsing.hs
parent477a67061f06827b7e807319404cc277a417e9d0 (diff)
downloadpandoc-ac0a9da6d85e9b7a73973a20019caa324b2c1aff.tar.gz
Improved parsing of raw LaTeX from Text streams (rawLaTeXParser).
We now use source positions from the token stream to tell us how much of the text stream to consume. Getting this to work required a few other changes to make token source positions accurate. Closes #7434.
Diffstat (limited to 'src/Text/Pandoc/Readers/LaTeX/Parsing.hs')
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs39
1 files changed, 34 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index a17b1f324..9dac4d6ef 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
@@ -28,6 +29,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, applyMacros
, tokenize
, tokenizeSources
+ , getInputTokens
, untokenize
, untoken
, totoks
@@ -246,18 +248,23 @@ withVerbatimMode parser = do
updateState $ \st -> st{ sVerbatimMode = False }
return result
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
=> [Tok] -> Bool -> LP m a -> LP m a
-> ParserT Sources s m (a, Text)
rawLaTeXParser toks retokenize parser valParser = do
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate }
let lstate' = lstate { sMacros = extractMacros pstate }
+ let setStartPos = case toks of
+ Tok pos _ _ : _ -> setPosition pos
+ _ -> return ()
+ let preparser = setStartPos >> parser
let rawparser = (,) <$> withRaw valParser <*> getState
- res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+ res' <- lift $ runParserT (withRaw (preparser >> getPosition))
+ lstate "chunk" toks
case res' of
Left _ -> mzero
- Right toks' -> do
+ Right (endpos, toks') -> do
res <- lift $ runParserT (do when retokenize $ do
-- retokenize, applying macros
ts <- many (satisfyTok (const True))
@@ -268,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do
Left _ -> mzero
Right ((val, raw), st) -> do
updateState (updateMacros (sMacros st <>))
- void $ count (T.length (untokenize toks')) anyChar
+ let skipTilPos stopPos = do
+ anyChar
+ pos <- getPosition
+ if pos >= stopPos
+ then return ()
+ else skipTilPos stopPos
+ skipTilPos endpos
let result = untokenize raw
-- ensure we end with space if input did, see #4442
let result' =
@@ -306,6 +319,17 @@ tokenizeSources = concatMap tokenizeSource . unSources
where
tokenizeSource (pos, t) = totoks pos t
+-- Return tokens from input sources. Ensure that starting position is
+-- correct.
+getInputTokens :: PandocMonad m => ParserT Sources s m [Tok]
+getInputTokens = do
+ pos <- getPosition
+ ss <- getInput
+ return $
+ case ss of
+ Sources [] -> []
+ Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest)
+
tokenize :: SourceName -> Text -> [Tok]
tokenize sourcename = totoks (initialPos sourcename)
@@ -433,8 +457,13 @@ parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
parseFromToks parser toks = do
oldInput <- getInput
setInput toks
+ oldpos <- getPosition
+ case toks of
+ Tok pos _ _ : _ -> setPosition pos
+ _ -> return ()
result <- disablingWithRaw parser
setInput oldInput
+ setPosition oldpos
return result
disablingWithRaw :: PandocMonad m => LP m a -> LP m a
@@ -458,7 +487,7 @@ satisfyTok f = do
| otherwise = Nothing
updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
updatePos _spos _ (Tok pos _ _ : _) = pos
- updatePos spos _ [] = incSourceColumn spos 1
+ updatePos spos (Tok _ _ t) [] = incSourceColumn spos (T.length t)
doMacros :: PandocMonad m => LP m ()
doMacros = do