aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-07-12 14:11:09 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-07-12 14:11:09 -0700
commit8bbcff0cfcd9923cdcf5024d13bb411d085715d0 (patch)
treefbed47b6034120d029cda85c4e3497d2e96c35c3
parent6c4345aa0bbdc963e1a1c9a9ea2a55e3682a1fa5 (diff)
parent72fe742ca014f42c7e45e8046ceeea3c0ab2cd9a (diff)
downloadpandoc-8bbcff0cfcd9923cdcf5024d13bb411d085715d0.tar.gz
Merge pull request #1414 from mpickering/general
Improvements to Parsing.hs
-rw-r--r--src/Text/Pandoc/Parsing.hs321
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs6
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs2
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs8
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs2
-rw-r--r--src/Text/Pandoc/Readers/RST.hs8
6 files changed, 191 insertions, 156 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 391131338..d4d5295c0 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1,5 +1,8 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving, TypeSynonymInstances,
- FlexibleInstances#-}
+{-# LANGUAGE
+ FlexibleContexts
+, GeneralizedNewtypeDeriving
+, TypeSynonymInstances
+, FlexibleInstances #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -29,8 +32,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
A utility library with parsers used in pandoc readers.
-}
-module Text.Pandoc.Parsing ( (>>~),
- anyLine,
+module Text.Pandoc.Parsing ( anyLine,
many1Till,
notFollowedBy',
oneOfStrings,
@@ -98,6 +100,7 @@ module Text.Pandoc.Parsing ( (>>~),
macro,
applyMacros',
Parser,
+ ParserT,
F(..),
runF,
askF,
@@ -177,12 +180,15 @@ import Text.Pandoc.Asciify (toAsciiChar)
import Data.Default
import qualified Data.Set as Set
import Control.Monad.Reader
-import Control.Applicative ((*>), (<*), (<$), liftA2, Applicative)
+import Control.Monad.Identity
+import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative)
import Data.Monoid
import Data.Maybe (catMaybes)
type Parser t s = Parsec t s
+type ParserT = ParsecT
+
newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor)
runF :: F a -> ParserState -> a
@@ -199,13 +205,8 @@ instance Monoid a => Monoid (F a) where
mappend = liftM2 mappend
mconcat = liftM mconcat . sequence
--- | Like >>, but returns the operation on the left.
--- (Suggested by Tillmann Rendel on Haskell-cafe list.)
-(>>~) :: (Monad m) => m a -> m b -> m a
-a >>~ b = a >>= \x -> b >> return x
-
-- | Parse any line of text
-anyLine :: Parser [Char] st [Char]
+anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char]
anyLine = do
-- This is much faster than:
-- manyTill anyChar newline
@@ -221,9 +222,10 @@ anyLine = do
_ -> mzero
-- | Like @manyTill@, but reads at least one item.
-many1Till :: Parser [tok] st a
- -> Parser [tok] st end
- -> Parser [tok] st [a]
+many1Till :: Stream s m t
+ => ParserT s st m a
+ -> ParserT s st m end
+ -> ParserT s st m [a]
many1Till p end = do
first <- p
rest <- manyTill p end
@@ -232,21 +234,21 @@ many1Till p end = do
-- | A more general form of @notFollowedBy@. This one allows any
-- type of parser to be specified, and succeeds only if that parser fails.
-- It does not consume any input.
-notFollowedBy' :: Show b => Parser [a] st b -> Parser [a] st ()
+notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m ()
notFollowedBy' p = try $ join $ do a <- try p
return (unexpected (show a))
<|>
return (return ())
-- (This version due to Andrew Pimlott on the Haskell mailing list.)
-oneOfStrings' :: (Char -> Char -> Bool) -> [String] -> Parser [Char] st String
+oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
oneOfStrings' _ [] = fail "no strings"
oneOfStrings' matches strs = try $ do
c <- anyChar
let strs' = [xs | (x:xs) <- strs, x `matches` c]
case strs' of
[] -> fail "not found"
- _ -> (c:) `fmap` oneOfStrings' matches strs'
+ _ -> (c:) <$> oneOfStrings' matches strs'
<|> if "" `elem` strs'
then return [c]
else fail "not found"
@@ -254,11 +256,11 @@ oneOfStrings' matches strs = try $ do
-- | Parses one of a list of strings. If the list contains
-- two strings one of which is a prefix of the other, the longer
-- string will be matched if possible.
-oneOfStrings :: [String] -> Parser [Char] st String
+oneOfStrings :: Stream s m Char => [String] -> ParserT s st m String
oneOfStrings = oneOfStrings' (==)
-- | Parses one of a list of strings (tried in order), case insensitive.
-oneOfStringsCI :: [String] -> Parser [Char] st String
+oneOfStringsCI :: Stream s m Char => [String] -> ParserT s st m String
oneOfStringsCI = oneOfStrings' ciMatch
where ciMatch x y = toLower' x == toLower' y
-- this optimizes toLower by checking common ASCII case
@@ -269,35 +271,35 @@ oneOfStringsCI = oneOfStrings' ciMatch
| otherwise = toLower c
-- | Parses a space or tab.
-spaceChar :: Parser [Char] st Char
+spaceChar :: Stream s m Char => ParserT s st m Char
spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
-- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Parser [Char] st Char
+nonspaceChar :: Stream s m Char => ParserT s st m Char
nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r']
-- | Skips zero or more spaces or tabs.
-skipSpaces :: Parser [Char] st ()
+skipSpaces :: Stream s m Char => ParserT s st m ()
skipSpaces = skipMany spaceChar
-- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Parser [Char] st Char
+blankline :: Stream s m Char => ParserT s st m Char
blankline = try $ skipSpaces >> newline
-- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Parser [Char] st [Char]
+blanklines :: Stream s m Char => ParserT s st m [Char]
blanklines = many1 blankline
-- | Parses material enclosed between start and end parsers.
-enclosed :: Parser [Char] st t -- ^ start parser
- -> Parser [Char] st end -- ^ end parser
- -> Parser [Char] st a -- ^ content parser (to be used repeatedly)
- -> Parser [Char] st [a]
+enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser
+ -> ParserT s st m end -- ^ end parser
+ -> ParserT s st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT s st m [a]
enclosed start end parser = try $
start >> notFollowedBy space >> many1Till parser end
-- | Parse string, case insensitive.
-stringAnyCase :: [Char] -> Parser [Char] st String
+stringAnyCase :: Stream s m Char => [Char] -> ParserT s st m String
stringAnyCase [] = string ""
stringAnyCase (x:xs) = do
firstChar <- char (toUpper x) <|> char (toLower x)
@@ -305,7 +307,7 @@ stringAnyCase (x:xs) = do
return (firstChar:rest)
-- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: Parser [tok] st a -> [tok] -> Parser [tok] st a
+parseFromString :: Stream s m t => ParserT s st m a -> s -> ParserT s st m a
parseFromString parser str = do
oldPos <- getPosition
oldInput <- getInput
@@ -316,7 +318,7 @@ parseFromString parser str = do
return result
-- | Parse raw line block up to and including blank lines.
-lineClump :: Parser [Char] st String
+lineClump :: Stream [Char] m Char => ParserT [Char] st m String
lineClump = blanklines
<|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
@@ -325,8 +327,8 @@ lineClump = blanklines
-- pairs of open and close, which must be different. For example,
-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
-- and return "hello (there)".
-charsInBalanced :: Char -> Char -> Parser [Char] st Char
- -> Parser [Char] st String
+charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
+ -> ParserT s st m String
charsInBalanced open close parser = try $ do
char open
let isDelim c = c == open || c == close
@@ -350,8 +352,8 @@ uppercaseRomanDigits :: [Char]
uppercaseRomanDigits = map toUpper lowercaseRomanDigits
-- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Bool -- ^ Uppercase if true
- -> Parser [Char] st Int
+romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true
+ -> ParserT s st m Int
romanNumeral upperCase = do
let romanDigits = if upperCase
then uppercaseRomanDigits
@@ -383,12 +385,12 @@ romanNumeral upperCase = do
-- | Parses an email address; returns original and corresponding
-- escaped mailto: URI.
-emailAddress :: Parser [Char] st (String, String)
-emailAddress = try $ liftA2 toResult mailbox (char '@' *> domain)
+emailAddress :: Stream s m Char => ParserT s st m (String, String)
+emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom
in (full, escapeURI $ "mailto:" ++ full)
- mailbox = intercalate "." `fmap` (emailWord `sepby1` dot)
- domain = intercalate "." `fmap` (subdomain `sepby1` dot)
+ mailbox = intercalate "." <$> (emailWord `sepby1` dot)
+ domain = intercalate "." <$> (subdomain `sepby1` dot)
dot = char '.'
subdomain = many1 $ alphaNum <|> innerPunct
innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@') <*
@@ -398,7 +400,7 @@ emailAddress = try $ liftA2 toResult mailbox (char '@' *> domain)
isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;"
-- note: sepBy1 from parsec consumes input when sep
-- succeeds and p fails, so we use this variant here.
- sepby1 p sep = liftA2 (:) p (many (try $ sep >> p))
+ sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p))
-- Schemes from http://www.iana.org/assignments/uri-schemes.html plus
@@ -426,11 +428,11 @@ schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
"ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri",
"ymsgr"]
-uriScheme :: Parser [Char] st String
+uriScheme :: Stream s m Char => ParserT s st m String
uriScheme = oneOfStringsCI schemes
-- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Parser [Char] st (String, String)
+uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
uri = try $ do
scheme <- uriScheme
char ':'
@@ -451,7 +453,7 @@ uri = try $ do
<|> entity
<|> (try $ punct >>
lookAhead (void (satisfy isWordChar) <|> percentEscaped))
- str <- snd `fmap` withRaw (skipMany1 ( () <$
+ str <- snd <$> withRaw (skipMany1 ( () <$
(enclosed (char '(') (char ')') uriChunk
<|> enclosed (char '{') (char '}') uriChunk
<|> enclosed (char '[') (char ']') uriChunk)
@@ -460,7 +462,7 @@ uri = try $ do
let uri' = scheme ++ ":" ++ fromEntities str'
return (uri', escapeURI uri')
-mathInlineWith :: String -> String -> Parser [Char] st String
+mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String
mathInlineWith op cl = try $ do
string op
notFollowedBy space
@@ -474,12 +476,12 @@ mathInlineWith op cl = try $ do
notFollowedBy digit -- to prevent capture of $5
return $ concat words'
-mathDisplayWith :: String -> String -> Parser [Char] st String
+mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String
mathDisplayWith op cl = try $ do
string op
- many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
+ many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
-mathDisplay :: Parser [Char] ParserState String
+mathDisplay :: Stream s m Char => ParserT s ParserState m String
mathDisplay =
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
@@ -487,7 +489,7 @@ mathDisplay =
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathDisplayWith "\\\\[" "\\\\]")
-mathInline :: Parser [Char] ParserState String
+mathInline :: Stream s m Char => ParserT s ParserState m String
mathInline =
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
@@ -499,8 +501,9 @@ mathInline =
-- displacement (the difference between the source column at the end
-- and the source column at the beginning). Vertical displacement
-- (source row) is ignored.
-withHorizDisplacement :: Parser [Char] st a -- ^ Parser to apply
- -> Parser [Char] st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement :: Stream s m Char
+ => ParserT s st m a -- ^ Parser to apply
+ -> ParserT s st m (a, Int) -- ^ (result, displacement)
withHorizDisplacement parser = do
pos1 <- getPosition
result <- parser
@@ -509,7 +512,7 @@ withHorizDisplacement parser = do
-- | Applies a parser and returns the raw string that was parsed,
-- along with the value produced by the parser.
-withRaw :: Monad m => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char])
+withRaw :: Stream [Char] m Char => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char])
withRaw parser = do
pos1 <- getPosition
inp <- getInput
@@ -525,12 +528,13 @@ withRaw parser = do
return (result, raw)
-- | Parses backslash, then applies character parser.
-escaped :: Parser [Char] st Char -- ^ Parser for character to escape
- -> Parser [Char] st Char
+escaped :: Stream s m Char
+ => ParserT s st m Char -- ^ Parser for character to escape
+ -> ParserT s st m Char
escaped parser = try $ char '\\' >> parser
-- | Parse character entity.
-characterReference :: Parser [Char] st Char
+characterReference :: Stream s m Char => ParserT s st m Char
characterReference = try $ do
char '&'
ent <- many1Till nonspaceChar (char ';')
@@ -539,19 +543,19 @@ characterReference = try $ do
Nothing -> fail "entity not found"
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Parser [Char] st (ListNumberStyle, Int)
+upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
upperRoman = do
num <- romanNumeral True
return (UpperRoman, num)
-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Parser [Char] st (ListNumberStyle, Int)
+lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
lowerRoman = do
num <- romanNumeral False
return (LowerRoman, num)
-- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Parser [Char] st (ListNumberStyle, Int)
+decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
decimal = do
num <- many1 digit
return (Decimal, read num)
@@ -560,7 +564,8 @@ decimal = do
-- returns (DefaultStyle, [next example number]). The next
-- example number is incremented in parser state, and the label
-- (if present) is added to the label table.
-exampleNum :: Parser [Char] ParserState (ListNumberStyle, Int)
+exampleNum :: Stream s m Char
+ => ParserT s ParserState m (ListNumberStyle, Int)
exampleNum = do
char '@'
lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-'))
@@ -574,38 +579,39 @@ exampleNum = do
return (Example, num)
-- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Parser [Char] st (ListNumberStyle, Int)
+defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
defaultNum = do
char '#'
return (DefaultStyle, 1)
-- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Parser [Char] st (ListNumberStyle, Int)
+lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
lowerAlpha = do
ch <- oneOf ['a'..'z']
return (LowerAlpha, ord ch - ord 'a' + 1)
-- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Parser [Char] st (ListNumberStyle, Int)
+upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
upperAlpha = do
ch <- oneOf ['A'..'Z']
return (UpperAlpha, ord ch - ord 'A' + 1)
-- | Parses a roman numeral i or I
-romanOne :: Parser [Char] st (ListNumberStyle, Int)
+romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
(char 'I' >> return (UpperRoman, 1))
-- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Parser [Char] ParserState ListAttributes
+anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
anyOrderedListMarker = choice $
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
numParser <- [decimal, exampleNum, defaultNum, romanOne,
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Parser [Char] st (ListNumberStyle, Int)
- -> Parser [Char] st ListAttributes
+inPeriod :: Stream s m Char
+ => ParserT s st m (ListNumberStyle, Int)
+ -> ParserT s st m ListAttributes
inPeriod num = try $ do
(style, start) <- num
char '.'
@@ -615,16 +621,18 @@ inPeriod num = try $ do
return (start, style, delim)
-- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Parser [Char] st (ListNumberStyle, Int)
- -> Parser [Char] st ListAttributes
+inOneParen :: Stream s m Char
+ => ParserT s st m (ListNumberStyle, Int)
+ -> ParserT s st m ListAttributes
inOneParen num = try $ do
(style, start) <- num
char ')'
return (start, style, OneParen)
-- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Parser [Char] st (ListNumberStyle, Int)
- -> Parser [Char] st ListAttributes
+inTwoParens :: Stream s m Char
+ => ParserT s st m (ListNumberStyle, Int)
+ -> ParserT s st m ListAttributes
inTwoParens num = try $ do
char '('
(style, start) <- num
@@ -633,9 +641,10 @@ inTwoParens num = try $ do
-- | Parses an ordered list marker with a given style and delimiter,
-- returns number.
-orderedListMarker :: ListNumberStyle
+orderedListMarker :: Stream s m Char
+ => ListNumberStyle
-> ListNumberDelim
- -> Parser [Char] ParserState Int
+ -> ParserT s ParserState m Int
orderedListMarker style delim = do
let num = defaultNum <|> -- # can continue any kind of list
case style of
@@ -655,12 +664,12 @@ orderedListMarker style delim = do
return start
-- | Parses a character reference and returns a Str element.
-charRef :: Parser [Char] st Inline
+charRef :: Stream s m Char => ParserT s st m Inline
charRef = do
c <- characterReference
return $ Str [c]
-lineBlockLine :: Parser [Char] st String
+lineBlockLine :: Stream [Char] m Char => ParserT [Char] st m String
lineBlockLine = try $ do
char '|'
char ' '
@@ -671,7 +680,7 @@ lineBlockLine = try $ do
return $ white ++ unwords (line : continuations)
-- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Parser [Char] st [String]
+lineBlockLines :: Stream [Char] m Char => ParserT [Char] st m [String]
lineBlockLines = try $ do
lines' <- many1 lineBlockLine
skipMany1 $ blankline <|> try (char '|' >> blankline)
@@ -679,11 +688,12 @@ lineBlockLines = try $ do
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'.
-tableWith :: Parser [Char] ParserState ([[Block]], [Alignment], [Int])
- -> ([Int] -> Parser [Char] ParserState [[Block]])
- -> Parser [Char] ParserState sep
- -> Parser [Char] ParserState end
- -> Parser [Char] ParserState Block
+tableWith :: Stream s m Char
+ => ParserT s ParserState m ([[Block]], [Alignment], [Int])
+ -> ([Int] -> ParserT s ParserState m [[Block]])
+ -> ParserT s ParserState m sep
+ -> ParserT s ParserState m end
+ -> ParserT s ParserState m Block
tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- rowParser indices `sepEndBy1` lineParser
@@ -725,9 +735,10 @@ widthsFromIndices numColumns' indices =
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-gridTableWith :: Parser [Char] ParserState [Block] -- ^ Block list parser
+gridTableWith :: Stream [Char] m Char
+ => ParserT [Char] ParserState m [Block] -- ^ Block list parser
-> Bool -- ^ Headerless table
- -> Parser [Char] ParserState Block
+ -> ParserT [Char] ParserState m Block
gridTableWith blocks headless =
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
@@ -736,27 +747,28 @@ gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitStringByIndices (init indices) $ trimr line
-gridPart :: Char -> Parser [Char] st (Int, Int)
+gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int)
gridPart ch = do
dashes <- many1 (char ch)
char '+'
return (length dashes, length dashes + 1)
-gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
+gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)]
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: String -> String
removeFinalBar =
reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-- | Separator between rows of grid table.
-gridTableSep :: Char -> Parser [Char] ParserState Char
+gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
-gridTableHeader :: Bool -- ^ Headerless table
- -> Parser [Char] ParserState [Block]
- -> Parser [Char] ParserState ([[Block]], [Alignment], [Int])
+gridTableHeader :: Stream [Char] m Char
+ => Bool -- ^ Headerless table
+ -> ParserT [Char] ParserState m [Block]
+ -> ParserT [Char] ParserState m ([[Block]], [Alignment], [Int])
gridTableHeader headless blocks = try $ do
optional blanklines
dashes <- gridDashedLines '-'
@@ -779,16 +791,17 @@ gridTableHeader headless blocks = try $ do
heads <- mapM (parseFromString blocks) $ map trim rawHeads
return (heads, aligns, indices)
-gridTableRawLine :: [Int] -> Parser [Char] ParserState [String]
+gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
return (gridTableSplitLine indices line)
-- | Parse row of grid table.
-gridTableRow :: Parser [Char] ParserState [Block]
+gridTableRow :: Stream [Char] m Char
+ => ParserT [Char] ParserState m [Block]
-> [Int]
- -> Parser [Char] ParserState [[Block]]
+ -> ParserT [Char] ParserState m [[Block]]
gridTableRow blocks indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
@@ -807,15 +820,16 @@ compactifyCell :: [Block] -> [Block]
compactifyCell bs = head $ compactify [bs]
-- | Parse footer for a grid table.
-gridTableFooter :: Parser [Char] ParserState [Char]
+gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char]
gridTableFooter = blanklines
---
-- | Parse a string with a given parser and state.
-readWith :: Parser [Char] st a -- ^ parser
+readWith :: (Show s, Stream s Identity Char)
+ => ParserT s st Identity a -- ^ parser
-> st -- ^ initial state
- -> [Char] -- ^ input
+ -> s -- ^ input
-> a
readWith parser state input =
case runParser parser state "source" input of
@@ -823,15 +837,16 @@ readWith parser state input =
let errPos = errorPos err'
errLine = sourceLine errPos
errColumn = sourceColumn errPos
- theline = (lines input ++ [""]) !! (errLine - 1)
+ theline = (lines (show input) ++ [""]) !! (errLine - 1)
in error $ "\nError at " ++ show err' ++ "\n" ++
theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++
"^"
Right result -> result
-- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a) => Parser [Char] ParserState a
- -> String
+testStringWith :: (Show s, Show a, Stream s Identity Char)
+ => ParserT s ParserState Identity a
+ -> s
-> IO ()
testStringWith parser str = UTF8.putStrLn $ show $
readWith parser defaultParserState str
@@ -878,9 +893,9 @@ instance HasMeta ParserState where
class HasReaderOptions st where
extractReaderOptions :: st -> ReaderOptions
- getOption :: (ReaderOptions -> b) -> Parser s st b
+ getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b
-- default
- getOption f = (f . extractReaderOptions) `fmap` getState
+ getOption f = (f . extractReaderOptions) <$> getState
instance HasReaderOptions ParserState where
extractReaderOptions = stateOptions
@@ -946,19 +961,19 @@ defaultParserState =
stateWarnings = []}
-- | Succeed only if the extension is enabled.
-guardEnabled :: HasReaderOptions st => Extension -> Parser s st ()
+guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
-- | Succeed only if the extension is disabled.
-guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
+guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
-- | Update the position on which the last string ended.
-updateLastStrPos :: HasLastStrPosition st => Parser s st ()
+updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m ()
updateLastStrPos = getPosition >>= updateState . setLastStrPos
-- | Whether we are right after the end of a string.
-notAfterString :: HasLastStrPosition st => Parser s st Bool
+notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool
notAfterString = do
pos <- getPosition
st <- getState
@@ -998,10 +1013,10 @@ type SubstTable = M.Map Key Inlines
-- and the auto_identifers extension is set, generate a new
-- unique identifier, and update the list of identifiers
-- in state.
-registerHeader :: (HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
- => Attr -> Inlines -> Parser s st Attr
+registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
+ => Attr -> Inlines -> ParserT s st m Attr
registerHeader (ident,classes,kvs) header' = do
- ids <- extractIdentifierList `fmap` getState
+ ids <- extractIdentifierList <$> getState
exts <- getOption readerExtensions
let insert' = M.insertWith (\_new old -> old)
if null ident && Ext_auto_identifiers `Set.member` exts
@@ -1020,25 +1035,28 @@ registerHeader (ident,classes,kvs) header' = do
return (ident,classes,kvs)
-- | Fail unless we're in "smart typography" mode.
-failUnlessSmart :: HasReaderOptions st => Parser s st ()
+failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m ()
failUnlessSmart = getOption readerSmart >>= guard
-smartPunctuation :: Parser [Char] ParserState Inlines
- -> Parser [Char] ParserState Inlines
+smartPunctuation :: Stream s m Char
+ => ParserT s ParserState m Inlines
+ -> ParserT s ParserState m Inlines
smartPunctuation inlineParser = do
failUnlessSmart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-apostrophe :: Parser [Char] ParserState Inlines
+apostrophe :: Stream s m Char => ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
-quoted :: Parser [Char] ParserState Inlines
- -> Parser [Char] ParserState Inlines
+quoted :: Stream s m Char
+ => ParserT s ParserState m Inlines
+ -> ParserT s ParserState m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
-withQuoteContext :: QuoteContext
- -> Parser [tok] ParserState a
- -> Parser [tok] ParserState a
+withQuoteContext :: Stream s m t
+ => QuoteContext
+ -> ParserT s ParserState m a
+ -> ParserT s ParserState m a
withQuoteContext context parser = do
oldState <- getState
let oldQuoteContext = stateQuoteContext oldState
@@ -1048,108 +1066,122 @@ withQuoteContext context parser = do
setState newState { stateQuoteContext = oldQuoteContext }
return result
-singleQuoted :: Parser [Char] ParserState Inlines
- -> Parser [Char] ParserState Inlines
+singleQuoted :: Stream s m Char
+ => ParserT s ParserState m Inlines
+ -> ParserT s ParserState m Inlines
singleQuoted inlineParser = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
return . B.singleQuoted . mconcat
-doubleQuoted :: Parser [Char] ParserState Inlines
- -> Parser [Char] ParserState Inlines
+doubleQuoted :: Stream s m Char
+ => ParserT s ParserState m Inlines
+ -> ParserT s ParserState m Inlines
doubleQuoted inlineParser = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
return . B.doubleQuoted . mconcat
-failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState ()
+failIfInQuoteContext :: Stream s m t
+ => QuoteContext
+ -> ParserT s ParserState m ()
failIfInQuoteContext context = do
st <- getState
if stateQuoteContext st == context
then fail "already inside quotes"
else return ()
-charOrRef :: [Char] -> Parser [Char] st Char
+charOrRef :: Stream s m Char => String -> ParserT s st m Char
charOrRef cs =
oneOf cs <|> try (do c <- characterReference
guard (c `elem` cs)
return c)
-singleQuoteStart :: Parser [Char] ParserState ()
+singleQuoteStart :: Stream s m Char
+ => ParserT s ParserState m ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
-- single quote start can't be right after str
guard =<< notAfterString
() <$ charOrRef "'\8216\145"
-singleQuoteEnd :: Parser [Char] st ()
+singleQuoteEnd :: Stream s m Char
+ => ParserT s st m ()
singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
-doubleQuoteStart :: Parser [Char] ParserState ()
+doubleQuoteStart :: Stream s m Char
+ => ParserT s ParserState m ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do charOrRef "\"\8220\147"
notFollowedBy . satisfy $ flip elem [' ', '\t', '\n']
-doubleQuoteEnd :: Parser [Char] st ()
-doubleQuoteEnd = do
- charOrRef "\"\8221\148"
- return ()
+doubleQuoteEnd :: Stream s m Char
+ => ParserT s st m ()
+doubleQuoteEnd = void (charOrRef "\"\8221\148")
-ellipses :: Parser [Char] st Inlines
+ellipses :: Stream s m Char
+ => ParserT s st m Inlines
ellipses = do
try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
return (B.str "\8230")
-dash :: Parser [Char] ParserState Inlines
+dash :: Stream s m Char => ParserT s ParserState m Inlines
dash = do
oldDashes <- getOption readerOldDashes
if oldDashes
then emDashOld <|> enDashOld
- else B.str `fmap` (hyphenDash <|> emDash <|> enDash)
+ else B.str <$> (hyphenDash <|> emDash <|> enDash)
-- Two hyphens = en-dash, three = em-dash
-hyphenDash :: Parser [Char] st String
+hyphenDash :: Stream s m Char
+ => ParserT s st m String
hyphenDash = do
try $ string "--"
option "\8211" (char '-' >> return "\8212")
-emDash :: Parser [Char] st String
+emDash :: Stream s m Char
+ => ParserT s st m String
emDash = do
try (charOrRef "\8212\151")
return "\8212"
-enDash :: Parser [Char] st String
+enDash :: Stream s m Char
+ => ParserT s st m String
enDash = do
try (charOrRef "\8212\151")
return "\8211"
-enDashOld :: Parser [Char] st Inlines
+enDashOld :: Stream s m Char
+ => ParserT s st m Inlines
enDashOld = do
try (charOrRef "\8211\150") <|>
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
return (B.str "\8211")
-emDashOld :: Parser [Char] st Inlines
+emDashOld :: Stream s m Char
+ => ParserT s st m Inlines
emDashOld = do
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
return (B.str "\8212")
-- This is used to prevent exponential blowups for things like:
-- a**a*a**a*a**a*a**a*a**a*a**a*a**
-nested :: Parser s ParserState a
- -> Parser s ParserState a
+nested :: Stream s m a
+ => ParserT s ParserState m a
+ -> ParserT s ParserState m a
nested p = do
- nestlevel <- stateMaxNestingLevel `fmap` getState
+ nestlevel <- stateMaxNestingLevel <$> getState
guard $ nestlevel > 0
updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
res <- p
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
-citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String)
+citeKey :: (Stream s m Char, HasLastStrPosition st)
+ => ParserT s st m (Bool, String)
citeKey = try $ do
guard =<< notAfterString
suppress_author <- option False (char '-' *> return True)
@@ -1166,7 +1198,8 @@ citeKey = try $ do
--
-- | Parse a \newcommand or \renewcommand macro definition.
-macro :: (HasMacros st, HasReaderOptions st) => Parser [Char] st Blocks
+macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st)
+ => ParserT [Char] st m Blocks
macro = do
apply <- getOption readerApplyMacros
inp <- getInput
@@ -1181,10 +1214,12 @@ macro = do
else return $ rawBlock "latex" def'
-- | Apply current macros to string.
-applyMacros' :: String -> Parser [Char] ParserState String
+applyMacros' :: Stream [Char] m Char
+ => String
+ -> ParserT [Char] ParserState m String
applyMacros' target = do
apply <- getOption readerApplyMacros
if apply
- then do macros <- extractMacros `fmap` getState
+ then do macros <- extractMacros <$> getState
return $ applyMacros macros target
else return target
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 381b67e18..cedbb8c9e 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -128,7 +128,7 @@ pBulletList = try $ do
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
- items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul")
+ items <- manyTill (pInTags "li" block <* skipMany nonItem) (pCloses "ul")
return $ B.bulletList $ map (fixPlains True) items
pOrderedList :: TagParser Blocks
@@ -156,7 +156,7 @@ pOrderedList = try $ do
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
- items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol")
+ items <- manyTill (pInTags "li" block <* skipMany nonItem) (pCloses "ol")
return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items
pDefinitionList :: TagParser Blocks
@@ -244,7 +244,7 @@ pTable :: TagParser Blocks
pTable = try $ do
TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
skipMany pBlank
- caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank
+ caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
-- TODO actually read these and take width information from them
widths' <- pColgroup <|> many pCol
head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 97bfaa455..339f8e3c9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -104,7 +104,7 @@ dimenarg = try $ do
sp :: LP ()
sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
- <|> (try $ newline >>~ lookAhead anyChar >>~ notFollowedBy blankline)
+ <|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline)
isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5361158cc..7d19ee1e6 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -571,7 +571,7 @@ attributes :: MarkdownParser Attr
attributes = try $ do
char '{'
spnl
- attrs <- many (attribute >>~ spnl)
+ attrs <- many (attribute <* spnl)
char '}'
return $ foldl (\x f -> f x) nullAttr attrs
@@ -688,7 +688,7 @@ birdTrackLine c = try $ do
--
emailBlockQuoteStart :: MarkdownParser Char
-emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ')
+emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
emailBlockQuote :: MarkdownParser [String]
emailBlockQuote = try $ do
@@ -1165,7 +1165,7 @@ gridPart ch = do
return (length dashes, length dashes + 1)
gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: String -> String
removeFinalBar =
@@ -1499,7 +1499,7 @@ inlinesBetween :: (Show b)
inlinesBetween start end =
(trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
- innerSpace = try $ whitespace >>~ notFollowedBy' end
+ innerSpace = try $ whitespace <* notFollowedBy' end
strikeout :: MarkdownParser (F Inlines)
strikeout = fmap B.strikeout <$>
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index f1dcce8f7..719bde160 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -634,7 +634,7 @@ inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines
inlinesBetween start end =
(trimInlines . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
- innerSpace = try $ whitespace >>~ notFollowedBy' end
+ innerSpace = try $ whitespace <* notFollowedBy' end
emph :: MWParser Inlines
emph = B.emph <$> nested (inlinesBetween start end)
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index fa8438e70..b7bc83e86 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -460,7 +460,7 @@ listItem :: RSTParser Int
listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
- blanks <- choice [ try (many blankline >>~ lookAhead start),
+ blanks <- choice [ try (many blankline <* lookAhead start),
many1 blankline ] -- whole list must end with blank.
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
@@ -480,7 +480,7 @@ listItem start = try $ do
orderedList :: RSTParser Blocks
orderedList = try $ do
- (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
+ (start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar)
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify' items
return $ B.orderedListWith (start, style, delim) items'
@@ -747,7 +747,7 @@ simpleReferenceName = do
referenceName :: RSTParser Inlines
referenceName = quotedReferenceName <|>
- (try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
+ (try $ simpleReferenceName <* lookAhead (char ':')) <|>
unquotedReferenceName
referenceKey :: RSTParser [Char]
@@ -1076,7 +1076,7 @@ explicitLink = try $ do
referenceLink :: RSTParser Inlines
referenceLink = try $ do
- (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) >>~
+ (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <*
char '_'
state <- getState
let keyTable = stateKeys state