From a1a57bce4e32cc26b968bcc2847a8e8da30f725b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Mar 2021 00:02:24 -0700 Subject: T.P.Shared: remove `backslashEscapes`, `escapeStringUsing`. [API change] These are inefficient association list lookups. Replace with more efficient functions in the writers that used them (with 10-25% performance improvements in haddock, org, rtf, texinfo writers). --- src/Text/Pandoc/Shared.hs | 13 ------------- src/Text/Pandoc/Writers/AsciiDoc.hs | 11 ++++++++--- src/Text/Pandoc/Writers/ConTeXt.hs | 6 +++++- src/Text/Pandoc/Writers/Haddock.hs | 15 +++++++++++++-- src/Text/Pandoc/Writers/LaTeX.hs | 14 +++++++++++++- src/Text/Pandoc/Writers/Org.hs | 15 +++++++++------ src/Text/Pandoc/Writers/RTF.hs | 26 ++++++++++++++++---------- src/Text/Pandoc/Writers/Texinfo.hs | 24 +++++++++++++----------- 8 files changed, 77 insertions(+), 47 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 46aea9c03..23adff909 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -26,8 +26,6 @@ module Text.Pandoc.Shared ( findM, -- * Text processing tshow, - backslashEscapes, - escapeStringUsing, elemText, notElemText, stripTrailingNewlines, @@ -184,17 +182,6 @@ findM p = foldr go (pure Nothing) tshow :: Show a => a -> T.Text tshow = T.pack . show --- | Returns an association list of backslash escapes for the --- designated characters. -backslashEscapes :: [Char] -- ^ list of special characters to escape - -> [(Char, T.Text)] -backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch])) - --- | Escape a string of characters, using an association list of --- characters and strings. -escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text -escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl - -- | @True@ exactly when the @Char@ appears in the @Text@. elemText :: Char -> T.Text -> Bool elemText c = T.any (== c) diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 69e608ef9..ab7e5f1a9 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -105,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do -- | Escape special characters for AsciiDoc. escapeString :: Text -> Text -escapeString = escapeStringUsing escs - where escs = backslashEscapes "{" +escapeString t + | T.any (== '{') t = T.concatMap escChar t + | otherwise = t + where escChar '{' = "\\{" + escChar c = T.singleton c -- | Ordered list start parser for use in Para below. olMarker :: Parser Text ParserState Char @@ -496,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do | otherwise -> [Str "``"] ++ lst ++ [Str "''"] inlineToAsciiDoc _ (Code _ str) = do isAsciidoctor <- gets asciidoctorVariant - let contents = literal (escapeStringUsing (backslashEscapes "`") str) + let escChar '`' = "\\'" + escChar c = T.singleton c + let contents = literal (T.concatMap escChar str) return $ if isAsciidoctor then text "`+" <> contents <> "+`" diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 1c56388ed..3c9975be8 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -434,9 +434,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do put $ st {stNextRef = next + 1} let ref = "url" <> tshow next contents <- inlineListToConTeXt txt + let escChar '#' = "\\#" + escChar '%' = "\\%" + escChar c = T.singleton c + let escContextURL = T.concatMap escChar return $ "\\useURL" <> brackets (literal ref) - <> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) + <> brackets (literal $ escContextURL src) <> (if isAutolink then empty else brackets empty <> brackets contents) diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index aaa19ed07..75e14714b 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -15,6 +15,7 @@ Haddock: -} module Text.Pandoc.Writers.Haddock (writeHaddock) where import Control.Monad.State.Strict +import Data.Char (isAlphaNum) import Data.Default import Data.Text (Text) import qualified Data.Text as T @@ -71,8 +72,18 @@ notesToHaddock opts notes = -- | Escape special characters for Haddock. escapeString :: Text -> Text -escapeString = escapeStringUsing haddockEscapes - where haddockEscapes = backslashEscapes "\\/'`\"@<" +escapeString t + | T.all isAlphaNum t = t + | otherwise = T.concatMap escChar t + where + escChar '\\' = "\\\\" + escChar '/' = "\\/" + escChar '\'' = "\\'" + escChar '`' = "\\`" + escChar '"' = "\\\"" + escChar '@' = "\\@" + escChar '<' = "\\<" + escChar c = T.singleton c -- | Convert Pandoc block element to haddock. blockToHaddock :: PandocMonad m diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 6a205a798..1c970e6ad 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -825,7 +825,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of (c:_) -> c [] -> '!' - let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str + let isEscapable '\\' = True + isEscapable '{' = True + isEscapable '}' = True + isEscapable '%' = True + isEscapable '~' = True + isEscapable '_' = True + isEscapable '&' = True + isEscapable '#' = True + isEscapable '^' = True + isEscapable _ = False + let escChar c | isEscapable c = T.pack ['\\',c] + | otherwise = T.singleton c + let str' = T.concatMap escChar str -- we always put lstinline in a dummy 'passthrough' command -- (defined in the default template) so that we don't have -- to change the way we escape characters depending on whether diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index bb645eaf9..88a2b8314 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -84,12 +84,15 @@ noteToOrg num note = do -- | Escape special characters for Org. escapeString :: Text -> Text -escapeString = escapeStringUsing - [ ('\x2014',"---") - , ('\x2013',"--") - , ('\x2019',"'") - , ('\x2026',"...") - ] +escapeString t + | T.all (\c -> c < '\x2013' || c > '\x2026') t = t + | otherwise = T.concatMap escChar t + where + escChar '\x2013' = "--" + escChar '\x2014' = "---" + escChar '\x2019' = "'" + escChar '\x2026' = "..." + escChar c = T.singleton c isRawFormat :: Format -> Bool isRawFormat f = diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index cf27011c2..3527949b4 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF import Control.Monad.Except (catchError, throwError) import Control.Monad import qualified Data.ByteString as B -import Data.Char (chr, isDigit, ord) +import Data.Char (chr, isDigit, ord, isAlphaNum) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T @@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c -> -- | Escape special characters. escapeSpecial :: Text -> Text -escapeSpecial = escapeStringUsing $ - [ ('\t',"\\tab ") - , ('\8216',"\\u8216'") - , ('\8217',"\\u8217'") - , ('\8220',"\\u8220\"") - , ('\8221',"\\u8221\"") - , ('\8211',"\\u8211-") - , ('\8212',"\\u8212-") - ] <> backslashEscapes "{\\}" +escapeSpecial t + | T.all isAlphaNum t = t + | otherwise = T.concatMap escChar t + where + escChar '\t' = "\\tab " + escChar '\8216' = "\\u8216'" + escChar '\8217' = "\\u8217'" + escChar '\8220' = "\\u8220\"" + escChar '\8221' = "\\u8221\"" + escChar '\8211' = "\\u8211-" + escChar '\8212' = "\\u8212-" + escChar '{' = "\\{" + escChar '}' = "\\}" + escChar '\\' = "\\\\" + escChar c = T.singleton c -- | Escape strings as needed for rich text format. stringToRTF :: Text -> Text diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 0146fdfd8..6a33b4283 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -14,7 +14,7 @@ Conversion of 'Pandoc' format into Texinfo. module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Control.Monad.Except (throwError) import Control.Monad.State.Strict -import Data.Char (chr, ord) +import Data.Char (chr, ord, isAlphaNum) import Data.List (maximumBy, transpose, foldl') import Data.List.NonEmpty (nonEmpty) import Data.Ord (comparing) @@ -85,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do -- | Escape things as needed for Texinfo. stringToTexinfo :: Text -> Text -stringToTexinfo = escapeStringUsing texinfoEscapes - where texinfoEscapes = [ ('{', "@{") - , ('}', "@}") - , ('@', "@@") - , ('\160', "@ ") - , ('\x2014', "---") - , ('\x2013', "--") - , ('\x2026', "@dots{}") - , ('\x2019', "'") - ] +stringToTexinfo t + | T.all isAlphaNum t = t + | otherwise = T.concatMap escChar t + where escChar '{' = "@{" + escChar '}' = "@}" + escChar '@' = "@@" + escChar '\160' = "@ " + escChar '\x2014' = "---" + escChar '\x2013' = "--" + escChar '\x2026' = "@dots{}" + escChar '\x2019' = "'" + escChar c = T.singleton c escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text) escapeCommas parser = do -- cgit v1.2.3