aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2013-02-02 18:46:10 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2013-02-02 18:46:10 -0800
commita6c167125fb423b365940401b45e5a95791b2fcf (patch)
tree94b193e97b67573c70359bca185eacb43cfbb032 /src/Text
parent92acb24486129912ca967260bd8bb00990bf7013 (diff)
downloadpandoc-a6c167125fb423b365940401b45e5a95791b2fcf.tar.gz
Optimized oneOfStringsCI.
The call to toLower in ciMatch was very expensive (and very often used), because toLower from Data.Char calls a fully unicode aware function. This optimization avoids the call to toLower for the most common, ASCII cases. This dramatically reduces the speed penalty that comes from enabling the `autolink_bare_uris` extension. The penalty is still substantial (in one test, from 0.33s to 0.44s), but nowhere near what it used to be.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Parsing.hs12
1 files changed, 9 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 002bc18de..ebff8ec77 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -152,8 +152,8 @@ import Text.Pandoc.Builder (Blocks, Inlines, rawBlock)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Parsec
import Text.Parsec.Pos (newPos)
-import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isHexDigit,
- isSpace )
+import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isDigit,
+ isHexDigit, isSpace )
import Data.List ( intercalate, transpose )
import Text.Pandoc.Shared
import qualified Data.Map as M
@@ -244,7 +244,13 @@ oneOfStrings = oneOfStrings' (==)
-- | Parses one of a list of strings (tried in order), case insensitive.
oneOfStringsCI :: [String] -> Parser [Char] st String
oneOfStringsCI = oneOfStrings' ciMatch
- where ciMatch x y = toLower x == toLower y
+ where ciMatch x y = toLower' x == toLower' y
+ -- this optimizes toLower by checking common ASCII case
+ -- first, before calling the expensive unicode-aware
+ -- function:
+ toLower' c | c >= 'A' && c <= 'Z' = chr (ord c + 32)
+ | isAscii c = c
+ | otherwise = toLower c
-- | Parses a space or tab.
spaceChar :: Parser [Char] st Char