path: root/src/Text/Pandoc
diff options
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-28 12:52:41 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-28 12:52:41 -0800
commitf6cf03857b59776f4f44ea831787231f7f93da96 (patch)
tree6acbe7cc096f04703c10107888d98f27c515ead4 /src/Text/Pandoc
parent564c39beef36bf008fa5d2c840560ef064152e7d (diff)
LaTeX reader efficiency improvements.
In conjunction with other changes this makes the reader almost twice as fast on our benchmark as it was on Feb. 10.
Diffstat (limited to 'src/Text/Pandoc')
1 files changed, 42 insertions, 31 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2155379db..4956b90cb 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -191,12 +191,6 @@ inlineCommand = do
word :: PandocMonad m => LP m Inlines
word = str . untoken <$> satisfyTok isWordTok
-regularSymbol :: PandocMonad m => LP m Inlines
-regularSymbol = str . untoken <$> satisfyTok isRegularSymbol
- where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
- isRegularSymbol _ = False
- isSpecial c = c `Set.member` specialChars
inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
ils <- grouped inline
@@ -961,31 +955,48 @@ lookupListDefault d = (fromMaybe d .) . lookupList
where lookupList l m = msum $ map (`M.lookup` m) l
inline :: PandocMonad m => LP m Inlines
-inline = (mempty <$ comment)
- <|> (space <$ whitespace)
- <|> (softbreak <$ endline)
- <|> word
- <|> macroDef (rawInline "latex")
- <|> inlineCommand'
- <|> inlineEnvironment
- <|> inlineGroup
- <|> (symbol '-' *>
- option (str "-") (symbol '-' *>
- option (str "–") (str "—" <$ symbol '-')))
- <|> doubleQuote
- <|> singleQuote
- <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
- <|> (str "’" <$ symbol '\'')
- <|> (str "\160" <$ symbol '~')
- <|> dollarsMath
- <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
- <|> (str . T.singleton <$> primEscape)
- <|> regularSymbol
- <|> (do res <- symbolIn "#^'`\"[]&"
- pos <- getPosition
- let s = untoken res
- report $ ParsingUnescaped s pos
- return $ str s)
+inline = do
+ Tok pos toktype t <- lookAhead anyTok
+ let symbolAsString = str . untoken <$> anySymbol
+ let unescapedSymbolAsString =
+ do s <- untoken <$> anySymbol
+ report $ ParsingUnescaped s pos
+ return $ str s
+ case toktype of
+ Comment -> mempty <$ comment
+ Spaces -> space <$ whitespace
+ Newline -> softbreak <$ endline
+ Word -> word
+ Esc1 -> str . T.singleton <$> primEscape
+ Esc2 -> str . T.singleton <$> primEscape
+ Symbol ->
+ case t of
+ "-" -> symbol '-' *>
+ option (str "-") (symbol '-' *>
+ option (str "–") (str "—" <$ symbol '-'))
+ "'" -> symbol '\'' *>
+ option (str "’") (str "”" <$ symbol '\'')
+ "~" -> str "\160" <$ symbol '~'
+ "`" -> doubleQuote <|> singleQuote <|> symbolAsString
+ "\"" -> doubleQuote <|> singleQuote <|> symbolAsString
+ "“" -> doubleQuote <|> symbolAsString
+ "‘" -> singleQuote <|> symbolAsString
+ "$" -> dollarsMath <|> unescapedSymbolAsString
+ "|" -> (guardEnabled Ext_literate_haskell *>
+ symbol '|' *> doLHSverb) <|> symbolAsString
+ "{" -> inlineGroup
+ "#" -> unescapedSymbolAsString
+ "&" -> unescapedSymbolAsString
+ "_" -> unescapedSymbolAsString
+ "^" -> unescapedSymbolAsString
+ "\\" -> mzero
+ "}" -> mzero
+ _ -> symbolAsString
+ CtrlSeq _ -> macroDef (rawInline "latex")
+ <|> inlineCommand'
+ <|> inlineEnvironment
+ <|> inlineGroup
+ _ -> mzero
inlines :: PandocMonad m => LP m Inlines
inlines = mconcat <$> many inline