aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-03-20 00:02:24 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2021-03-20 00:24:49 -0700
commita1a57bce4e32cc26b968bcc2847a8e8da30f725b (patch)
tree9adc14014aff9edaa5e801d259b7976817eaf683 /src/Text/Pandoc
parentceadf33246bcc42747b42c10c108bfc7d8663ab7 (diff)
downloadpandoc-a1a57bce4e32cc26b968bcc2847a8e8da30f725b.tar.gz
T.P.Shared: remove `backslashEscapes`, `escapeStringUsing`.
[API change] These are inefficient association list lookups. Replace with more efficient functions in the writers that used them (with 10-25% performance improvements in haddock, org, rtf, texinfo writers).
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Shared.hs13
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs11
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs6
-rw-r--r--src/Text/Pandoc/Writers/Haddock.hs15
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs14
-rw-r--r--src/Text/Pandoc/Writers/Org.hs15
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs26
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs24
8 files changed, 77 insertions, 47 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 46aea9c03..23adff909 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -26,8 +26,6 @@ module Text.Pandoc.Shared (
findM,
-- * Text processing
tshow,
- backslashEscapes,
- escapeStringUsing,
elemText,
notElemText,
stripTrailingNewlines,
@@ -184,17 +182,6 @@ findM p = foldr go (pure Nothing)
tshow :: Show a => a -> T.Text
tshow = T.pack . show
--- | Returns an association list of backslash escapes for the
--- designated characters.
-backslashEscapes :: [Char] -- ^ list of special characters to escape
- -> [(Char, T.Text)]
-backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch]))
-
--- | Escape a string of characters, using an association list of
--- characters and strings.
-escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text
-escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl
-
-- | @True@ exactly when the @Char@ appears in the @Text@.
elemText :: Char -> T.Text -> Bool
elemText c = T.any (== c)
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 69e608ef9..ab7e5f1a9 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -105,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
-- | Escape special characters for AsciiDoc.
escapeString :: Text -> Text
-escapeString = escapeStringUsing escs
- where escs = backslashEscapes "{"
+escapeString t
+ | T.any (== '{') t = T.concatMap escChar t
+ | otherwise = t
+ where escChar '{' = "\\{"
+ escChar c = T.singleton c
-- | Ordered list start parser for use in Para below.
olMarker :: Parser Text ParserState Char
@@ -496,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do
| otherwise -> [Str "``"] ++ lst ++ [Str "''"]
inlineToAsciiDoc _ (Code _ str) = do
isAsciidoctor <- gets asciidoctorVariant
- let contents = literal (escapeStringUsing (backslashEscapes "`") str)
+ let escChar '`' = "\\'"
+ escChar c = T.singleton c
+ let contents = literal (T.concatMap escChar str)
return $
if isAsciidoctor
then text "`+" <> contents <> "+`"
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 1c56388ed..3c9975be8 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -434,9 +434,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do
put $ st {stNextRef = next + 1}
let ref = "url" <> tshow next
contents <- inlineListToConTeXt txt
+ let escChar '#' = "\\#"
+ escChar '%' = "\\%"
+ escChar c = T.singleton c
+ let escContextURL = T.concatMap escChar
return $ "\\useURL"
<> brackets (literal ref)
- <> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
+ <> brackets (literal $ escContextURL src)
<> (if isAutolink
then empty
else brackets empty <> brackets contents)
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index aaa19ed07..75e14714b 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -15,6 +15,7 @@ Haddock: <http://www.haskell.org/haddock/doc/html/>
-}
module Text.Pandoc.Writers.Haddock (writeHaddock) where
import Control.Monad.State.Strict
+import Data.Char (isAlphaNum)
import Data.Default
import Data.Text (Text)
import qualified Data.Text as T
@@ -71,8 +72,18 @@ notesToHaddock opts notes =
-- | Escape special characters for Haddock.
escapeString :: Text -> Text
-escapeString = escapeStringUsing haddockEscapes
- where haddockEscapes = backslashEscapes "\\/'`\"@<"
+escapeString t
+ | T.all isAlphaNum t = t
+ | otherwise = T.concatMap escChar t
+ where
+ escChar '\\' = "\\\\"
+ escChar '/' = "\\/"
+ escChar '\'' = "\\'"
+ escChar '`' = "\\`"
+ escChar '"' = "\\\""
+ escChar '@' = "\\@"
+ escChar '<' = "\\<"
+ escChar c = T.singleton c
-- | Convert Pandoc block element to haddock.
blockToHaddock :: PandocMonad m
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 6a205a798..1c970e6ad 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -825,7 +825,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do
let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of
(c:_) -> c
[] -> '!'
- let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str
+ let isEscapable '\\' = True
+ isEscapable '{' = True
+ isEscapable '}' = True
+ isEscapable '%' = True
+ isEscapable '~' = True
+ isEscapable '_' = True
+ isEscapable '&' = True
+ isEscapable '#' = True
+ isEscapable '^' = True
+ isEscapable _ = False
+ let escChar c | isEscapable c = T.pack ['\\',c]
+ | otherwise = T.singleton c
+ let str' = T.concatMap escChar str
-- we always put lstinline in a dummy 'passthrough' command
-- (defined in the default template) so that we don't have
-- to change the way we escape characters depending on whether
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index bb645eaf9..88a2b8314 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -84,12 +84,15 @@ noteToOrg num note = do
-- | Escape special characters for Org.
escapeString :: Text -> Text
-escapeString = escapeStringUsing
- [ ('\x2014',"---")
- , ('\x2013',"--")
- , ('\x2019',"'")
- , ('\x2026',"...")
- ]
+escapeString t
+ | T.all (\c -> c < '\x2013' || c > '\x2026') t = t
+ | otherwise = T.concatMap escChar t
+ where
+ escChar '\x2013' = "--"
+ escChar '\x2014' = "---"
+ escChar '\x2019' = "'"
+ escChar '\x2026' = "..."
+ escChar c = T.singleton c
isRawFormat :: Format -> Bool
isRawFormat f =
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index cf27011c2..3527949b4 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF
import Control.Monad.Except (catchError, throwError)
import Control.Monad
import qualified Data.ByteString as B
-import Data.Char (chr, isDigit, ord)
+import Data.Char (chr, isDigit, ord, isAlphaNum)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
@@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c ->
-- | Escape special characters.
escapeSpecial :: Text -> Text
-escapeSpecial = escapeStringUsing $
- [ ('\t',"\\tab ")
- , ('\8216',"\\u8216'")
- , ('\8217',"\\u8217'")
- , ('\8220',"\\u8220\"")
- , ('\8221',"\\u8221\"")
- , ('\8211',"\\u8211-")
- , ('\8212',"\\u8212-")
- ] <> backslashEscapes "{\\}"
+escapeSpecial t
+ | T.all isAlphaNum t = t
+ | otherwise = T.concatMap escChar t
+ where
+ escChar '\t' = "\\tab "
+ escChar '\8216' = "\\u8216'"
+ escChar '\8217' = "\\u8217'"
+ escChar '\8220' = "\\u8220\""
+ escChar '\8221' = "\\u8221\""
+ escChar '\8211' = "\\u8211-"
+ escChar '\8212' = "\\u8212-"
+ escChar '{' = "\\{"
+ escChar '}' = "\\}"
+ escChar '\\' = "\\\\"
+ escChar c = T.singleton c
-- | Escape strings as needed for rich text format.
stringToRTF :: Text -> Text
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 0146fdfd8..6a33b4283 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -14,7 +14,7 @@ Conversion of 'Pandoc' format into Texinfo.
module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
import Control.Monad.Except (throwError)
import Control.Monad.State.Strict
-import Data.Char (chr, ord)
+import Data.Char (chr, ord, isAlphaNum)
import Data.List (maximumBy, transpose, foldl')
import Data.List.NonEmpty (nonEmpty)
import Data.Ord (comparing)
@@ -85,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do
-- | Escape things as needed for Texinfo.
stringToTexinfo :: Text -> Text
-stringToTexinfo = escapeStringUsing texinfoEscapes
- where texinfoEscapes = [ ('{', "@{")
- , ('}', "@}")
- , ('@', "@@")
- , ('\160', "@ ")
- , ('\x2014', "---")
- , ('\x2013', "--")
- , ('\x2026', "@dots{}")
- , ('\x2019', "'")
- ]
+stringToTexinfo t
+ | T.all isAlphaNum t = t
+ | otherwise = T.concatMap escChar t
+ where escChar '{' = "@{"
+ escChar '}' = "@}"
+ escChar '@' = "@@"
+ escChar '\160' = "@ "
+ escChar '\x2014' = "---"
+ escChar '\x2013' = "--"
+ escChar '\x2026' = "@dots{}"
+ escChar '\x2019' = "'"
+ escChar c = T.singleton c
escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text)
escapeCommas parser = do