diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2021-02-28 12:52:41 -0800 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2021-02-28 12:52:41 -0800 | 
| commit | f6cf03857b59776f4f44ea831787231f7f93da96 (patch) | |
| tree | 6acbe7cc096f04703c10107888d98f27c515ead4 /src/Text/Pandoc | |
| parent | 564c39beef36bf008fa5d2c840560ef064152e7d (diff) | |
| download | pandoc-f6cf03857b59776f4f44ea831787231f7f93da96.tar.gz | |
LaTeX reader efficiency improvements.
In conjunction with other changes this makes the reader
almost twice as fast on our benchmark as it was on Feb. 10.
Diffstat (limited to 'src/Text/Pandoc')
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 73 | 
1 files changed, 42 insertions, 31 deletions
| diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 2155379db..4956b90cb 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -191,12 +191,6 @@ inlineCommand = do  word :: PandocMonad m => LP m Inlines  word = str . untoken <$> satisfyTok isWordTok -regularSymbol :: PandocMonad m => LP m Inlines -regularSymbol = str . untoken <$> satisfyTok isRegularSymbol -  where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t -        isRegularSymbol _                = False -        isSpecial c = c `Set.member` specialChars -  inlineGroup :: PandocMonad m => LP m Inlines  inlineGroup = do    ils <- grouped inline @@ -961,31 +955,48 @@ lookupListDefault d = (fromMaybe d .) . lookupList    where lookupList l m = msum $ map (`M.lookup` m) l  inline :: PandocMonad m => LP m Inlines -inline = (mempty <$ comment) -     <|> (space  <$ whitespace) -     <|> (softbreak <$ endline) -     <|> word -     <|> macroDef (rawInline "latex") -     <|> inlineCommand' -     <|> inlineEnvironment -     <|> inlineGroup -     <|> (symbol '-' *> -           option (str "-") (symbol '-' *> -             option (str "–") (str "—" <$ symbol '-'))) -     <|> doubleQuote -     <|> singleQuote -     <|> (str "”" <$ try (symbol '\'' >> symbol '\'')) -     <|> (str "’" <$ symbol '\'') -     <|> (str "\160" <$ symbol '~') -     <|> dollarsMath -     <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) -     <|> (str . T.singleton <$> primEscape) -     <|> regularSymbol -     <|> (do res <- symbolIn "#^'`\"[]&" -             pos <- getPosition -             let s = untoken res -             report $ ParsingUnescaped s pos -             return $ str s) +inline = do +  Tok pos toktype t <- lookAhead anyTok +  let symbolAsString = str . untoken <$> anySymbol +  let unescapedSymbolAsString = +        do s <- untoken <$> anySymbol +           report $ ParsingUnescaped s pos +           return $ str s +  case toktype of +    Comment     -> mempty <$ comment +    Spaces      -> space <$ whitespace +    Newline     -> softbreak <$ endline +    Word        -> word +    Esc1        -> str . T.singleton <$> primEscape +    Esc2        -> str . T.singleton <$> primEscape +    Symbol      -> +      case t of +        "-"     -> symbol '-' *> +                    option (str "-") (symbol '-' *> +                      option (str "–") (str "—" <$ symbol '-')) +        "'"     -> symbol '\'' *> +                  option (str "’") (str  "”" <$ symbol '\'') +        "~"     -> str "\160" <$ symbol '~' +        "`"     -> doubleQuote <|> singleQuote <|> symbolAsString +        "\""    -> doubleQuote <|> singleQuote <|> symbolAsString +        "“"     -> doubleQuote <|> symbolAsString +        "‘"     -> singleQuote <|> symbolAsString +        "$"     -> dollarsMath <|> unescapedSymbolAsString +        "|"     -> (guardEnabled Ext_literate_haskell *> +                    symbol '|' *> doLHSverb) <|> symbolAsString +        "{"     -> inlineGroup +        "#"     -> unescapedSymbolAsString +        "&"     -> unescapedSymbolAsString +        "_"     -> unescapedSymbolAsString +        "^"     -> unescapedSymbolAsString +        "\\"    -> mzero +        "}"     -> mzero +        _       -> symbolAsString +    CtrlSeq _   -> macroDef (rawInline "latex") +                  <|> inlineCommand' +                  <|> inlineEnvironment +                  <|> inlineGroup +    _           -> mzero  inlines :: PandocMonad m => LP m Inlines  inlines = mconcat <$> many inline | 
