From e3a263df4669450049c49043412f791261e049a4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 11 Aug 2021 16:14:34 -0700 Subject: Fix scope for LaTeX macros. They should by default scope over the group in which they are defined (except `\gdef` and `\xdef`, which are global). In addition, environments must be treated as groups. We handle this by making sMacros in the LaTeX parser state a STACK of macro tables. Opening a group adds a table to the stack, closing one removes one. Only the top of the stack is queried. This commit adds a parameter for scope to the Macro constructor (not exported). Closes #7494. --- src/Text/Pandoc/Readers/LaTeX/Macro.hs | 100 ++++++++++++++++++------------- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 49 +++++++++++---- src/Text/Pandoc/Readers/LaTeX/Types.hs | 6 +- test/command/7494.md | 50 ++++++++++++++++ 4 files changed, 150 insertions(+), 55 deletions(-) create mode 100644 test/command/7494.md diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs index 6a7b0eed3..5709cbb8c 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs @@ -15,6 +15,8 @@ import Control.Applicative ((<|>), optional) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T +import qualified Data.List.NonEmpty as NonEmpty +import Data.List.NonEmpty (NonEmpty(..)) macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a macroDef constructor = do @@ -26,24 +28,36 @@ macroDef constructor = do nameMacroPairs <- newcommand <|> letmacro <|> edefmacro <|> defmacro <|> newif guardDisabled Ext_latex_macros <|> - mapM_ (\(name, macro') -> - updateState (\s -> s{ sMacros = M.insert name macro' - (sMacros s) })) nameMacroPairs + mapM_ insertMacro nameMacroPairs environmentDef = do mbenv <- newenvironment case mbenv of Nothing -> return () Just (name, macro1, macro2) -> guardDisabled Ext_latex_macros <|> - do updateState $ \s -> s{ sMacros = - M.insert name macro1 (sMacros s) } - updateState $ \s -> s{ sMacros = - M.insert ("end" <> name) macro2 (sMacros s) } + do insertMacro (name, macro1) + insertMacro ("end" <> name, macro2) -- @\newenvironment{envname}[n-args][default]{begin}{end}@ -- is equivalent to -- @\newcommand{\envname}[n-args][default]{begin}@ -- @\newcommand{\endenvname}@ +insertMacro :: PandocMonad m => (Text, Macro) -> LP m () +insertMacro (name, macro'@(Macro GlobalScope _ _ _ _)) = + updateState $ \s -> + s{ sMacros = NonEmpty.map (M.insert name macro') (sMacros s) } +insertMacro (name, macro'@(Macro GroupScope _ _ _ _)) = + updateState $ \s -> + s{ sMacros = M.insert name macro' (NonEmpty.head (sMacros s)) :| + NonEmpty.tail (sMacros s) } + +lookupMacro :: PandocMonad m => Text -> LP m Macro +lookupMacro name = do + macros :| _ <- sMacros <$> getState + case M.lookup name macros of + Just m -> return m + Nothing -> fail "Macro not found" + letmacro :: PandocMonad m => LP m [(Text, Macro)] letmacro = do controlSeq "let" @@ -54,20 +68,19 @@ letmacro = do -- we first parse in verbatim mode, and then expand macros, -- because we don't want \let\foo\bar to turn into -- \let\foo hello if we have previously \def\bar{hello} - macros <- sMacros <$> getState target <- anyControlSeq <|> singleChar case target of (Tok _ (CtrlSeq name') _) -> - case M.lookup name' macros of - Just m -> return [(name, m)] - Nothing -> return [(name, Macro ExpandWhenDefined [] Nothing [target])] - _ -> return [(name, Macro ExpandWhenDefined [] Nothing [target])] + (do m <- lookupMacro name' + pure [(name, m)]) + <|> pure [(name, + Macro GroupScope ExpandWhenDefined [] Nothing [target])] + _ -> pure [(name, Macro GroupScope ExpandWhenDefined [] Nothing [target])] edefmacro :: PandocMonad m => LP m [(Text, Macro)] edefmacro = do - controlSeq "edef" <|> controlSeq "xdef" - -- TODO Currently we don't distinguish these. \edef should only - -- affect its own group, while \xdef sets a global macro. + scope <- (GroupScope <$ controlSeq "edef") + <|> (GlobalScope <$ controlSeq "xdef") (name, contents) <- withVerbatimMode $ do Tok _ (CtrlSeq name) _ <- anyControlSeq -- we first parse in verbatim mode, and then expand macros, @@ -77,20 +90,19 @@ edefmacro = do return (name, contents) -- expand macros contents' <- parseFromToks (many anyTok) contents - return [(name, Macro ExpandWhenDefined [] Nothing contents')] + return [(name, Macro scope ExpandWhenDefined [] Nothing contents')] defmacro :: PandocMonad m => LP m [(Text, Macro)] defmacro = do -- we use withVerbatimMode, because macros are to be expanded -- at point of use, not point of definition - controlSeq "def" <|> controlSeq "gdef" - -- TODO Currently we don't distinguish these. \def should only - -- affect its own group, while \gdef sets a global macro. + scope <- (GroupScope <$ controlSeq "def") + <|> (GlobalScope <$ controlSeq "gdef") withVerbatimMode $ do Tok _ (CtrlSeq name) _ <- anyControlSeq argspecs <- many (argspecArg <|> argspecPattern) contents <- bracedOrToken - return [(name, Macro ExpandWhenUsed argspecs Nothing contents)] + return [(name, Macro scope ExpandWhenUsed argspecs Nothing contents)] -- \newif\iffoo' defines: -- \iffoo to be \iffalse @@ -105,16 +117,16 @@ newif = do -- \def\footrue{\def\iffoo\iftrue} -- \def\foofalse{\def\iffoo\iffalse} let base = T.drop 2 name - return [ (name, Macro ExpandWhenUsed [] Nothing + return [ (name, Macro GroupScope ExpandWhenUsed [] Nothing [Tok pos (CtrlSeq "iffalse") "\\iffalse"]) , (base <> "true", - Macro ExpandWhenUsed [] Nothing + Macro GroupScope ExpandWhenUsed [] Nothing [ Tok pos (CtrlSeq "def") "\\def" , Tok pos (CtrlSeq name) ("\\" <> name) , Tok pos (CtrlSeq "iftrue") "\\iftrue" ]) , (base <> "false", - Macro ExpandWhenUsed [] Nothing + Macro GroupScope ExpandWhenUsed [] Nothing [ Tok pos (CtrlSeq "def") "\\def" , Tok pos (CtrlSeq name) ("\\" <> name) , Tok pos (CtrlSeq "iffalse") "\\iffalse" @@ -161,14 +173,13 @@ newcommand = do : (contents' ++ [ Tok pos Symbol "}", Tok pos Symbol "}" ]) _ -> contents' - macros <- sMacros <$> getState - case M.lookup name macros of - Just macro - | mtype == "newcommand" -> do - report $ MacroAlreadyDefined txt pos - return [(name, macro)] - | mtype == "providecommand" -> return [(name, macro)] - _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)] + let macro = Macro GroupScope ExpandWhenUsed argspecs optarg contents + (do lookupMacro name + case mtype of + "providecommand" -> return [] + "renewcommand" -> return [(name, macro)] + _ -> [] <$ report (MacroAlreadyDefined txt pos)) + <|> pure [(name, macro)] newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro)) newenvironment = do @@ -187,17 +198,22 @@ newenvironment = do let argspecs = map (\i -> ArgNum i) [1..numargs] startcontents <- spaces >> bracedOrToken endcontents <- spaces >> bracedOrToken - macros <- sMacros <$> getState - case M.lookup name macros of - Just _ - | mtype == "newenvironment" -> do - report $ MacroAlreadyDefined name pos - return Nothing - | mtype == "provideenvironment" -> - return Nothing - _ -> return $ Just (name, - Macro ExpandWhenUsed argspecs optarg startcontents, - Macro ExpandWhenUsed [] Nothing endcontents) + -- we need the environment to be in a group so macros defined + -- inside behave correctly: + let bg = Tok pos (CtrlSeq "bgroup") "\\bgroup " + let eg = Tok pos (CtrlSeq "egroup") "\\egroup " + let result = (name, + Macro GroupScope ExpandWhenUsed argspecs optarg + (bg:startcontents), + Macro GroupScope ExpandWhenUsed [] Nothing (eg:endcontents)) + (do lookupMacro name + case mtype of + "provideenvironment" -> return Nothing + "renewenvironment" -> return (Just result) + _ -> do + report $ MacroAlreadyDefined name pos + return Nothing) + <|> return (Just result) bracketedNum :: PandocMonad m => LP m Int bracketedNum = do diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 8d6791abb..e1b72926f 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -102,6 +102,9 @@ import qualified Data.IntMap as IntMap import qualified Data.Map as M import qualified Data.Set as Set import Data.Text (Text) +import Data.Maybe (fromMaybe) +import Data.List.NonEmpty (NonEmpty(..)) +import qualified Data.List.NonEmpty as NonEmpty import qualified Data.Text as T import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) @@ -146,7 +149,7 @@ data TheoremSpec = data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sMeta :: Meta , sQuoteContext :: QuoteContext - , sMacros :: M.Map Text Macro + , sMacros :: NonEmpty (M.Map Text Macro) , sContainers :: [Text] , sLogMessages :: [LogMessage] , sIdentifiers :: Set.Set Text @@ -173,7 +176,7 @@ defaultLaTeXState :: LaTeXState defaultLaTeXState = LaTeXState{ sOptions = def , sMeta = nullMeta , sQuoteContext = NoQuote - , sMacros = M.empty + , sMacros = M.empty :| [] , sContainers = [] , sLogMessages = [] , sIdentifiers = Set.empty @@ -220,8 +223,9 @@ instance HasIncludeFiles LaTeXState where dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s } instance HasMacros LaTeXState where - extractMacros st = sMacros st - updateMacros f st = st{ sMacros = f (sMacros st) } + extractMacros st = NonEmpty.head $ sMacros st + updateMacros f st = st{ sMacros = f (NonEmpty.head $ sMacros st) + :| (NonEmpty.tail $ sMacros st) } instance HasReaderOptions LaTeXState where extractReaderOptions = sOptions @@ -254,7 +258,7 @@ rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } - let lstate' = lstate { sMacros = extractMacros pstate } + let lstate' = lstate { sMacros = extractMacros pstate :| [] } let setStartPos = case toks of Tok pos _ _ : _ -> setPosition pos _ -> return () @@ -274,7 +278,7 @@ rawLaTeXParser toks retokenize parser valParser = do case res of Left _ -> mzero Right ((val, raw), st) -> do - updateState (updateMacros (sMacros st <>)) + updateState (updateMacros ((NonEmpty.head (sMacros st)) <>)) let skipTilPos stopPos = do anyChar pos <- getPosition @@ -299,7 +303,7 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> do let retokenize = untokenize <$> many anyTok pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate - , sMacros = extractMacros pstate } + , sMacros = extractMacros pstate :| [] } res <- runParserT retokenize lstate "math" (tokenize "math" s) case res of Left e -> Prelude.fail (show e) @@ -552,10 +556,10 @@ doMacros' n inp = handleMacros n' spos name ts = do when (n' > 20) -- detect macro expansion loops $ throwError $ PandocMacroLoop name - macros <- sMacros <$> getState + (macros :| _ ) <- sMacros <$> getState case M.lookup name macros of Nothing -> trySpecialMacro name ts - Just (Macro expansionPoint argspecs optarg newtoks) -> do + Just (Macro _scope expansionPoint argspecs optarg newtoks) -> do let getargs' = do args <- (case expansionPoint of @@ -745,10 +749,22 @@ primEscape = do bgroup :: PandocMonad m => LP m Tok bgroup = try $ do optional sp - symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" + t <- symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup" + -- Add a copy of the macro table to the top of the macro stack, + -- private for this group. We inherit all the macros defined in + -- the parent group. + updateState $ \s -> s{ sMacros = NonEmpty.cons (NonEmpty.head (sMacros s)) + (sMacros s) } + return t + egroup :: PandocMonad m => LP m Tok -egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup" +egroup = do + t <- symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup" + -- remove the group's macro table from the stack + updateState $ \s -> s{ sMacros = fromMaybe (sMacros s) $ + NonEmpty.nonEmpty (NonEmpty.tail (sMacros s)) } + return t grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a grouped parser = try $ do @@ -1017,7 +1033,16 @@ resetCaption = updateState $ \st -> st{ sCaption = Nothing , sLastLabel = Nothing } env :: PandocMonad m => Text -> LP m a -> LP m a -env name p = p <* end_ name +env name p = do + -- environments are groups as far as macros are concerned, + -- so we need a local copy of the macro table (see above, bgroup, egroup): + updateState $ \s -> s{ sMacros = NonEmpty.cons (NonEmpty.head (sMacros s)) + (sMacros s) } + result <- p + updateState $ \s -> s{ sMacros = fromMaybe (sMacros s) $ + NonEmpty.nonEmpty (NonEmpty.tail (sMacros s)) } + end_ name + return result tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines tokWith inlineParser = try $ spaces >> diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs index c20b72bc5..a4eae56db 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -15,6 +15,7 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) , Macro(..) , ArgSpec(..) , ExpansionPoint(..) + , MacroScope(..) , SourcePos ) where @@ -43,7 +44,10 @@ tokToText (Tok _ _ t) = t data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed deriving (Eq, Ord, Show) -data Macro = Macro ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok] +data MacroScope = GlobalScope | GroupScope + deriving (Eq, Ord, Show) + +data Macro = Macro MacroScope ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok] deriving Show data ArgSpec = ArgNum Int | Pattern [Tok] diff --git a/test/command/7494.md b/test/command/7494.md new file mode 100644 index 000000000..d8a2ff7a5 --- /dev/null +++ b/test/command/7494.md @@ -0,0 +1,50 @@ +``` +% pandoc -f latex -t plain +\def\foo{BAR} +{\foo +\def\foo{BAZ} +\foo +} +\foo +^D +BAR BAZ BAR +``` + +``` +% pandoc -f latex -t plain +\def\foo{BAR} +{\foo +\gdef\foo{BAZ} +\foo +} +\foo +^D +BAR BAZ BAZ +``` + +``` +% pandoc -f latex -t plain +\newcommand{\aaa}{BBB} +{ +\renewcommand{\aaa}{AAA} +\aaa +} +\aaa +^D +AAA BBB +``` + +``` +% pandoc -f latex -t markdown +\newcommand{\aaa}{BBB} +\begin{quote} +\renewcommand{\aaa}{AAA} +\aaa +\end{quote} +\aaa +^D +> AAA + +BBB +``` + -- cgit v1.2.3