From 79a3449eeb15d2cd3127a1a5910b3f4b37fd2122 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2019 14:28:34 -0700 Subject: LaTeX reader: improve withRaw so it can handle cases where... the token string is modified by a parser (e.g. accent when it only takes part of a Word token). Closes #5686. Still not ideal, because we get the whole `\t0BAR` and not just `\t0` as a raw latex inline command. But I'm willing to let this be an edge case, since you can easily work around this by inserting a space, braces, or raw attribute. The important thing is that we no longer drop the rest of the document after a raw latex inline command that gobbles only part of a Word token! --- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 5 +++-- test/command/5686.md | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 test/command/5686.md diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index eeebab3e6..b9114d34c 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -677,6 +677,7 @@ withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok]) withRaw parser = do inp <- getInput result <- parser - nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok) - let raw = takeWhile (/= nxt) inp + nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok) + let raw = takeWhile (\(Tok pos _ _) -> maybe True + (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp return (result, raw) diff --git a/test/command/5686.md b/test/command/5686.md new file mode 100644 index 000000000..67b4579ef --- /dev/null +++ b/test/command/5686.md @@ -0,0 +1,9 @@ +``` +% pandoc -t native +FOO\t0BAR + +This part does not make it to the html output. +^D +[Para [Str "FOO",RawInline (Format "tex") "\\t0BAR"] +,Para [Str "This",Space,Str "part",Space,Str "does",Space,Str "not",Space,Str "make",Space,Str "it",Space,Str "to",Space,Str "the",Space,Str "html",Space,Str "output."]] +``` -- cgit v1.2.3