aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-10-16 22:03:57 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2017-10-16 22:05:34 -0700
commitc40857b38905b9ea298a777354eb4cb0da2213c6 (patch)
treecb5c453d2fcb5cfada4fa7c8aeb94205080afc74 /src/Text/Pandoc/Readers
parent9cf9a64923c672fafd1458bda6f643ada83e2b1e (diff)
downloadpandoc-c40857b38905b9ea298a777354eb4cb0da2213c6.tar.gz
Improved handling of include files in LaTeX reader.
Previously `\include` wouldn't work if the included file contained, e.g., a begin without a matching end. We've changed the Tok type so that it stores a full SourcePos, rather than just a line and column. So tokens keeep track of the file they came from. This allows us to use a simpler method for includes, which doesn't require parsing the included document as a whole. Closes #3971.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs134
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs8
2 files changed, 81 insertions, 61 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 57db33871..8c6c7d0ff 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -71,7 +71,9 @@ import Text.Pandoc.Shared
import Text.Pandoc.Readers.LaTeX.Types (Macro(..), ExpansionPoint(..), Tok(..),
TokType(..))
import Text.Pandoc.Walk
-import Text.Pandoc.Error (PandocError(PandocParsecError, PandocMacroLoop))
+import Text.Pandoc.Error
+ (PandocError(PandocParsecError, PandocParseError, PandocMacroLoop))
+import Text.Parsec.Pos
-- for debugging:
-- import Text.Pandoc.Extensions (getDefaultExtensions)
@@ -85,7 +87,7 @@ readLaTeX :: PandocMonad m
-> m Pandoc
readLaTeX opts ltx = do
parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
- (tokenize (crFilter ltx))
+ (tokenize "source" (crFilter ltx))
case parsed of
Right result -> return result
Left e -> throwError $ PandocParsecError (T.unpack ltx) e
@@ -127,7 +129,7 @@ resolveRefs _ x = x
-- res <- runIOorExplode (runParserT p defaultLaTeXState{
-- sOptions = def{ readerExtensions =
-- enableExtension Ext_raw_tex $
--- getDefaultExtensions "latex" }} "source" (tokenize t))
+-- getDefaultExtensions "latex" }} "source" (tokenize "source" t))
-- case res of
-- Left e -> error (show e)
-- Right r -> return r
@@ -238,7 +240,7 @@ rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> LP m a -> ParserT String s m String
rawLaTeXParser parser = do
inp <- getInput
- let toks = tokenize $ T.pack inp
+ let toks = tokenize "source" $ T.pack inp
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate }
res <- lift $ runParserT ((,) <$> try (snd <$> withRaw parser) <*> getState)
@@ -257,7 +259,7 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate
, sMacros = extractMacros pstate }
- res <- runParserT retokenize lstate "math" (tokenize (T.pack s))
+ res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
case res of
Left e -> fail (show e)
Right s' -> return s'
@@ -278,7 +280,7 @@ inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
inlineCommand = do
lookAhead (try (char '\\' >> letter) <|> char '$')
inp <- getInput
- let toks = tokenize $ T.pack inp
+ let toks = tokenize "chunk" $ T.pack inp
let rawinline = do
(il, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand')
st <- getState
@@ -294,32 +296,33 @@ inlineCommand = do
takeP (T.length (untokenize raw))
return il
-tokenize :: Text -> [Tok]
-tokenize = totoks (1, 1)
+tokenize :: SourceName -> Text -> [Tok]
+tokenize sourcename = totoks (initialPos sourcename)
-totoks :: (Line, Column) -> Text -> [Tok]
-totoks (lin,col) t =
+totoks :: SourcePos -> Text -> [Tok]
+totoks pos t =
case T.uncons t of
Nothing -> []
Just (c, rest)
| c == '\n' ->
- Tok (lin, col) Newline "\n"
- : totoks (lin + 1,1) rest
+ Tok pos Newline "\n"
+ : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
| isSpaceOrTab c ->
let (sps, rest') = T.span isSpaceOrTab t
- in Tok (lin, col) Spaces sps
- : totoks (lin, col + T.length sps) rest'
+ in Tok pos Spaces sps
+ : totoks (incSourceColumn pos (T.length sps))
+ rest'
| isAlphaNum c ->
let (ws, rest') = T.span isAlphaNum t
- in Tok (lin, col) Word ws
- : totoks (lin, col + T.length ws) rest'
+ in Tok pos Word ws
+ : totoks (incSourceColumn pos (T.length ws)) rest'
| c == '%' ->
let (cs, rest') = T.break (== '\n') rest
- in Tok (lin, col) Comment ("%" <> cs)
- : totoks (lin, col + 1 + T.length cs) rest'
+ in Tok pos Comment ("%" <> cs)
+ : totoks (incSourceColumn pos (1 + T.length cs)) rest'
| c == '\\' ->
case T.uncons rest of
- Nothing -> [Tok (lin, col) Symbol (T.singleton c)]
+ Nothing -> [Tok pos Symbol (T.singleton c)]
Just (d, rest')
| isLetterOrAt d ->
-- \makeatletter is common in macro defs;
@@ -328,24 +331,24 @@ totoks (lin,col) t =
-- probably best for now
let (ws, rest'') = T.span isLetterOrAt rest
(ss, rest''') = T.span isSpaceOrTab rest''
- in Tok (lin, col) (CtrlSeq ws) ("\\" <> ws <> ss)
- : totoks (lin,
- col + 1 + T.length ws + T.length ss) rest'''
+ in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
+ : totoks (incSourceColumn pos
+ (1 + T.length ws + T.length ss)) rest'''
| d == '\t' || d == '\n' ->
- Tok (lin, col) Symbol ("\\")
- : totoks (lin, col + 1) rest
+ Tok pos Symbol ("\\")
+ : totoks (incSourceColumn pos 1) rest
| otherwise ->
- Tok (lin, col) (CtrlSeq (T.singleton d)) (T.pack [c,d])
- : totoks (lin, col + 2) rest'
+ Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
+ : totoks (incSourceColumn pos 2) rest'
| c == '#' ->
let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
in case safeRead (T.unpack t1) of
Just i ->
- Tok (lin, col) (Arg i) ("#" <> t1)
- : totoks (lin, col + 1 + T.length t1) t2
+ Tok pos (Arg i) ("#" <> t1)
+ : totoks (incSourceColumn pos (1 + T.length t1)) t2
Nothing ->
- Tok (lin, col) Symbol ("#")
- : totoks (lin, col + 1) t2
+ Tok pos Symbol ("#")
+ : totoks (incSourceColumn pos 1) t2
| c == '^' ->
case T.uncons rest of
Just ('^', rest') ->
@@ -354,20 +357,20 @@ totoks (lin,col) t =
| isLowerHex d ->
case T.uncons rest'' of
Just (e, rest''') | isLowerHex e ->
- Tok (lin, col) Esc2 (T.pack ['^','^',d,e])
- : totoks (lin, col + 4) rest'''
+ Tok pos Esc2 (T.pack ['^','^',d,e])
+ : totoks (incSourceColumn pos 4) rest'''
_ ->
- Tok (lin, col) Esc1 (T.pack ['^','^',d])
- : totoks (lin, col + 3) rest''
+ Tok pos Esc1 (T.pack ['^','^',d])
+ : totoks (incSourceColumn pos 3) rest''
| d < '\128' ->
- Tok (lin, col) Esc1 (T.pack ['^','^',d])
- : totoks (lin, col + 3) rest''
- _ -> [Tok (lin, col) Symbol ("^"),
- Tok (lin, col + 1) Symbol ("^")]
- _ -> Tok (lin, col) Symbol ("^")
- : totoks (lin, col + 1) rest
+ Tok pos Esc1 (T.pack ['^','^',d])
+ : totoks (incSourceColumn pos 3) rest''
+ _ -> [Tok pos Symbol ("^"),
+ Tok (incSourceColumn pos 1) Symbol ("^")]
+ _ -> Tok pos Symbol ("^")
+ : totoks (incSourceColumn pos 1) rest
| otherwise ->
- Tok (lin, col) Symbol (T.singleton c) : totoks (lin, col + 1) rest
+ Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
where isSpaceOrTab ' ' = True
isSpaceOrTab '\t' = True
@@ -393,8 +396,7 @@ satisfyTok f =
where matcher t | f t = Just t
| otherwise = Nothing
updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
- updatePos spos _ (Tok (lin,col) _ _ : _) =
- setSourceColumn (setSourceLine spos lin) col
+ updatePos _spos _ (Tok pos _ _ : _) = pos
updatePos spos _ [] = spos
doMacros :: PandocMonad m => Int -> LP m ()
@@ -437,7 +439,7 @@ doMacros n = do
else doMacros (n + 1)
ExpandWhenDefined -> return ()
-setpos :: (Line, Column) -> Tok -> Tok
+setpos :: SourcePos -> Tok -> Tok
setpos spos (Tok _ tt txt) = Tok spos tt txt
anyControlSeq :: PandocMonad m => LP m Tok
@@ -728,15 +730,15 @@ doverb = do
verbTok :: PandocMonad m => Char -> LP m Tok
verbTok stopchar = do
- t@(Tok (lin, col) toktype txt) <- satisfyTok (not . isNewlineTok)
+ t@(Tok pos toktype txt) <- satisfyTok (not . isNewlineTok)
case T.findIndex (== stopchar) txt of
Nothing -> return t
Just i -> do
let (t1, t2) = T.splitAt i txt
inp <- getInput
- setInput $ Tok (lin, col + i) Symbol (T.singleton stopchar)
- : (totoks (lin, col + i + 1) (T.drop 1 t2)) ++ inp
- return $ Tok (lin, col) toktype t1
+ setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
+ : (totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2)) ++ inp
+ return $ Tok pos toktype t1
dolstinline :: PandocMonad m => LP m Inlines
dolstinline = do
@@ -1117,16 +1119,16 @@ tok = grouped inline <|> inlineCommand' <|> singleChar'
singleChar :: PandocMonad m => LP m Tok
singleChar = try $ do
- Tok (lin,col) toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
+ Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
guard $ not $ toktype == Symbol &&
T.any (`Set.member` specialChars) t
if T.length t > 1
then do
let (t1, t2) = (T.take 1 t, T.drop 1 t)
inp <- getInput
- setInput $ (Tok (lin, col + 1) toktype t2) : inp
- return $ Tok (lin,col) toktype t1
- else return $ Tok (lin,col) toktype t
+ setInput $ (Tok (incSourceColumn pos 1) toktype t2) : inp
+ return $ Tok pos toktype t1
+ else return $ Tok pos toktype t
opt :: PandocMonad m => LP m Inlines
opt = bracketed inline
@@ -1159,7 +1161,7 @@ withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
withRaw parser = do
inp <- getInput
result <- parser
- nxt <- option (Tok (0,0) Word "") (lookAhead anyTok)
+ nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
let raw = takeWhile (/= nxt) inp
return (result, raw)
@@ -1739,7 +1741,27 @@ include = do
then map (maybeAddExtension ".sty") fs
else map (maybeAddExtension ".tex") fs
dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
- mconcat <$> mapM (insertIncludedFile blocks (tokenize . T.pack) dirs) fs'
+ mapM_ (insertIncluded dirs) fs'
+ return mempty
+
+insertIncluded :: PandocMonad m
+ => [FilePath]
+ -> FilePath
+ -> LP m ()
+insertIncluded dirs f = do
+ pos <- getPosition
+ containers <- getIncludeFiles <$> getState
+ when (f `elem` containers) $ do
+ throwError $ PandocParseError $ "Include file loop at " ++ show pos
+ updateState $ addIncludeFile f
+ mbcontents <- readFileFromDirs dirs f
+ contents <- case mbcontents of
+ Just s -> return s
+ Nothing -> do
+ report $ CouldNotLoadIncludeFile f pos
+ return ""
+ getInput >>= setInput . (tokenize f (T.pack contents) ++)
+ updateState dropLatestIncludeFile
maybeAddExtension :: String -> FilePath -> FilePath
maybeAddExtension ext fp =
@@ -2394,9 +2416,7 @@ parseTableRow envname prefsufs = do
>> anyTok)
suffpos <- getPosition
option [] (count 1 amp)
- return $ map (setpos (sourceLine prefpos, sourceColumn prefpos)) pref
- ++ contents ++
- map (setpos (sourceLine suffpos, sourceColumn suffpos)) suff
+ return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
rawcells <- sequence (map celltoks prefsufs)
oldInput <- getInput
cells <- sequence $ map (\ts -> setInput ts >> parseTableCell) rawcells
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index 2bef3cb1a..9e441714d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -31,17 +31,17 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
, TokType(..)
, Macro(..)
, ExpansionPoint(..)
- , Line
- , Column )
+ , SourcePos
+ )
where
import Data.Text (Text)
-import Text.Parsec.Pos (Line, Column)
+import Text.Parsec.Pos (SourcePos)
data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
Esc1 | Esc2 | Arg Int
deriving (Eq, Ord, Show)
-data Tok = Tok (Line, Column) TokType Text
+data Tok = Tok SourcePos TokType Text
deriving (Eq, Ord, Show)
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed