diff options
author | John MacFarlane <jgm@berkeley.edu> | 2011-01-19 14:59:59 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2011-01-19 14:59:59 -0800 |
commit | c09518eefd0c1aadf705b13acedb73ed61d2a809 (patch) | |
tree | 49b2c42d5ee405e2659b807fd9a3d632e14b048f /src/Text/Pandoc | |
parent | 61f3db612cb0d84663499c6b9165f2a93fcc5e52 (diff) | |
download | pandoc-c09518eefd0c1aadf705b13acedb73ed61d2a809.tar.gz |
More small parser rewrites for small performance gains.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index e283c209d..6e7db4f8a 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -78,7 +78,7 @@ import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.ParserCombinators.Parsec import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum ) +import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) import Control.Monad ( join, liftM, guard ) @@ -175,7 +175,8 @@ lineClump = blanklines charsInBalanced :: Char -> Char -> GenParser Char st String charsInBalanced open close = try $ do char open - raw <- many $ (many1 (noneOf [open, close, '\n'])) + raw <- many $ (many1 (satisfy $ \c -> + c /= open && c /= close && c /= '\n')) <|> (do res <- charsInBalanced open close return $ [open] ++ res ++ [close]) <|> try (string "\n" >>~ notFollowedBy' blanklines) @@ -186,7 +187,7 @@ charsInBalanced open close = try $ do charsInBalanced' :: Char -> Char -> GenParser Char st String charsInBalanced' open close = try $ do char open - raw <- many $ (many1 (noneOf [open, close])) + raw <- many $ (many1 (satisfy $ \c -> c /= open && c /= close)) <|> (do res <- charsInBalanced' open close return $ [open] ++ res ++ [close]) char close @@ -207,7 +208,7 @@ romanNumeral upperCase = do let romanDigits = if upperCase then uppercaseRomanDigits else lowercaseRomanDigits - lookAhead $ oneOf romanDigits + lookAhead $ oneOf romanDigits let [one, five, ten, fifty, hundred, fivehundred, thousand] = map char romanDigits thousands <- many thousand >>= (return . (1000 *) . length) @@ -233,7 +234,8 @@ romanNumeral upperCase = do -- Parsers for email addresses and URIs emailChar :: GenParser Char st Char -emailChar = alphaNum <|> oneOf "-+_." +emailChar = alphaNum <|> + satisfy (\c -> c == '-' || c == '+' || c == '_' || c == '.') domainChar :: GenParser Char st Char domainChar = alphaNum <|> char '-' @@ -333,7 +335,7 @@ decimal = do exampleNum :: GenParser Char ParserState (ListNumberStyle, Int) exampleNum = do char '@' - lab <- many (alphaNum <|> oneOf "_-") + lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) st <- getState let num = stateNextExample st let newlabels = if null lab @@ -757,7 +759,7 @@ doubleQuoteStart :: GenParser Char ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220" - notFollowedBy (oneOf " \t\n") + notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) doubleQuoteEnd :: GenParser Char st () doubleQuoteEnd = do @@ -775,12 +777,12 @@ dash = enDash <|> emDash enDash :: GenParser Char st Inline enDash = do try (charOrRef "–") <|> - try (char '-' >> notFollowedBy (noneOf "0123456789") >> return '–') + try (char '-' >> lookAhead (satisfy isDigit) >> return '–') return EnDash emDash :: GenParser Char st Inline emDash = do - try (charOrRef "—") <|> (oneOfStrings ["---", "--"] >> return '—') + try (charOrRef "—") <|> (try $ string "--" >> optional (char '-') >> return '—') return EmDash -- |