diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Biblio.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 206 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 274 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 190 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 138 | ||||
-rw-r--r-- | src/Text/Pandoc/Templates.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/AsciiDoc.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 4 |
10 files changed, 442 insertions, 442 deletions
diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index cece13fba..13569a4d9 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -38,7 +38,7 @@ import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Shared (stringify) -import Text.ParserCombinators.Parsec +import Text.Parsec import Control.Monad -- | Process a 'Pandoc' document by adding citations formatted @@ -165,7 +165,7 @@ locatorWords inp = breakup (x : xs) = x : breakup xs splitup = groupBy (\x y -> x /= '\160' && y /= '\160') -pLocatorWords :: GenParser Inline st (String, [Inline]) +pLocatorWords :: Parsec [Inline] st (String, [Inline]) pLocatorWords = do l <- pLocator s <- getInput -- rest is suffix @@ -173,16 +173,16 @@ pLocatorWords = do then return (init l, Str "," : s) else return (l, s) -pMatch :: (Inline -> Bool) -> GenParser Inline st Inline +pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline pMatch condition = try $ do t <- anyToken guard $ condition t return t -pSpace :: GenParser Inline st Inline +pSpace :: Parsec [Inline] st Inline pSpace = pMatch (\t -> t == Space || t == Str "\160") -pLocator :: GenParser Inline st String +pLocator :: Parsec [Inline] st String pLocator = try $ do optional $ pMatch (== Str ",") optional pSpace @@ -190,7 +190,7 @@ pLocator = try $ do gs <- many1 pWordWithDigits return $ stringify f ++ (' ' : unwords gs) -pWordWithDigits :: GenParser Inline st String +pWordWithDigits :: Parsec [Inline] st String pWordWithDigits = try $ do pSpace r <- many1 (notFollowedBy pSpace >> anyToken) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 70a8586db..74e57acbd 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -79,11 +79,11 @@ where import Text.Pandoc.Definition import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) -import Text.ParserCombinators.Parsec +import Text.Parsec import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) -import Control.Monad ( join, liftM, guard ) +import Control.Monad ( join, liftM, guard, mzero ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions) @@ -96,13 +96,13 @@ import Data.Default a >>~ b = a >>= \x -> b >> return x -- | Parse any line of text -anyLine :: GenParser Char st [Char] +anyLine :: Parsec [Char] st [Char] anyLine = manyTill anyChar newline -- | Like @manyTill@, but reads at least one item. -many1Till :: GenParser tok st a - -> GenParser tok st end - -> GenParser tok st [a] +many1Till :: Parsec [tok] st a + -> Parsec [tok] st end + -> Parsec [tok] st [a] many1Till p end = do first <- p rest <- manyTill p end @@ -111,7 +111,7 @@ many1Till p end = do -- | A more general form of @notFollowedBy@. This one allows any -- type of parser to be specified, and succeeds only if that parser fails. -- It does not consume any input. -notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () +notFollowedBy' :: Show b => Parsec [a] st b -> Parsec [a] st () notFollowedBy' p = try $ join $ do a <- try p return (unexpected (show a)) <|> @@ -119,39 +119,39 @@ notFollowedBy' p = try $ join $ do a <- try p -- (This version due to Andrew Pimlott on the Haskell mailing list.) -- | Parses one of a list of strings (tried in order). -oneOfStrings :: [String] -> GenParser Char st String +oneOfStrings :: [String] -> Parsec [Char] st String oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings -- | Parses a space or tab. -spaceChar :: CharParser st Char +spaceChar :: Parsec [Char] st Char spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. -nonspaceChar :: CharParser st Char +nonspaceChar :: Parsec [Char] st Char nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r' -- | Skips zero or more spaces or tabs. -skipSpaces :: GenParser Char st () +skipSpaces :: Parsec [Char] st () skipSpaces = skipMany spaceChar -- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: GenParser Char st Char +blankline :: Parsec [Char] st Char blankline = try $ skipSpaces >> newline -- | Parses one or more blank lines and returns a string of newlines. -blanklines :: GenParser Char st [Char] +blanklines :: Parsec [Char] st [Char] blanklines = many1 blankline -- | Parses material enclosed between start and end parsers. -enclosed :: GenParser Char st t -- ^ start parser - -> GenParser Char st end -- ^ end parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] +enclosed :: Parsec [Char] st t -- ^ start parser + -> Parsec [Char] st end -- ^ end parser + -> Parsec [Char] st a -- ^ content parser (to be used repeatedly) + -> Parsec [Char] st [a] enclosed start end parser = try $ start >> notFollowedBy space >> many1Till parser end -- | Parse string, case insensitive. -stringAnyCase :: [Char] -> CharParser st String +stringAnyCase :: [Char] -> Parsec [Char] st String stringAnyCase [] = string "" stringAnyCase (x:xs) = do firstChar <- char (toUpper x) <|> char (toLower x) @@ -159,7 +159,7 @@ stringAnyCase (x:xs) = do return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a +parseFromString :: Parsec [tok] st a -> [tok] -> Parsec [tok] st a parseFromString parser str = do oldPos <- getPosition oldInput <- getInput @@ -170,7 +170,7 @@ parseFromString parser str = do return result -- | Parse raw line block up to and including blank lines. -lineClump :: GenParser Char st String +lineClump :: Parsec [Char] st String lineClump = blanklines <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) @@ -179,8 +179,8 @@ lineClump = blanklines -- pairs of open and close, which must be different. For example, -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" -- and return "hello (there)". -charsInBalanced :: Char -> Char -> GenParser Char st Char - -> GenParser Char st String +charsInBalanced :: Char -> Char -> Parsec [Char] st Char + -> Parsec [Char] st String charsInBalanced open close parser = try $ do char open let isDelim c = c == open || c == close @@ -205,7 +205,7 @@ uppercaseRomanDigits = map toUpper lowercaseRomanDigits -- | Parses a roman numeral (uppercase or lowercase), returns number. romanNumeral :: Bool -- ^ Uppercase if true - -> GenParser Char st Int + -> Parsec [Char] st Int romanNumeral upperCase = do let romanDigits = if upperCase then uppercaseRomanDigits @@ -235,14 +235,14 @@ romanNumeral upperCase = do -- Parsers for email addresses and URIs -emailChar :: GenParser Char st Char +emailChar :: Parsec [Char] st Char emailChar = alphaNum <|> satisfy (\c -> c == '-' || c == '+' || c == '_' || c == '.') -domainChar :: GenParser Char st Char +domainChar :: Parsec [Char] st Char domainChar = alphaNum <|> char '-' -domain :: GenParser Char st [Char] +domain :: Parsec [Char] st [Char] domain = do first <- many1 domainChar dom <- many1 $ try (char '.' >> many1 domainChar ) @@ -250,7 +250,7 @@ domain = do -- | Parses an email address; returns original and corresponding -- escaped mailto: URI. -emailAddress :: GenParser Char st (String, String) +emailAddress :: Parsec [Char] st (String, String) emailAddress = try $ do firstLetter <- alphaNum restAddr <- many emailChar @@ -261,7 +261,7 @@ emailAddress = try $ do return (full, escapeURI $ "mailto:" ++ full) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: GenParser Char st (String, String) +uri :: Parsec [Char] st (String, String) uri = try $ do let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", "news:", "telnet:" ] @@ -295,8 +295,8 @@ uri = try $ do -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement -- (source row) is ignored. -withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply - -> GenParser Char st (a, Int) -- ^ (result, displacement) +withHorizDisplacement :: Parsec [Char] st a -- ^ Parser to apply + -> Parsec [Char] st (a, Int) -- ^ (result, displacement) withHorizDisplacement parser = do pos1 <- getPosition result <- parser @@ -305,7 +305,7 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. -withRaw :: GenParser Char st a -> GenParser Char st (a, [Char]) +withRaw :: Parsec [Char] st a -> Parsec [Char] st (a, [Char]) withRaw parser = do pos1 <- getPosition inp <- getInput @@ -322,26 +322,26 @@ withRaw parser = do -- | Parses a character and returns 'Null' (so that the parser can move on -- if it gets stuck). -nullBlock :: GenParser Char st Block +nullBlock :: Parsec [Char] st Block nullBlock = anyChar >> return Null -- | Fail if reader is in strict markdown syntax mode. -failIfStrict :: GenParser a ParserState () +failIfStrict :: Parsec [a] ParserState () failIfStrict = do state <- getState if stateStrict state then fail "strict mode" else return () -- | Fail unless we're in literate haskell mode. -failUnlessLHS :: GenParser tok ParserState () +failUnlessLHS :: Parsec [tok] ParserState () failUnlessLHS = getState >>= guard . stateLiterateHaskell -- | Parses backslash, then applies character parser. -escaped :: GenParser Char st Char -- ^ Parser for character to escape - -> GenParser Char st Char +escaped :: Parsec [Char] st Char -- ^ Parser for character to escape + -> Parsec [Char] st Char escaped parser = try $ char '\\' >> parser -- | Parse character entity. -characterReference :: GenParser Char st Char +characterReference :: Parsec [Char] st Char characterReference = try $ do char '&' ent <- many1Till nonspaceChar (char ';') @@ -350,19 +350,19 @@ characterReference = try $ do Nothing -> fail "entity not found" -- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: GenParser Char st (ListNumberStyle, Int) +upperRoman :: Parsec [Char] st (ListNumberStyle, Int) upperRoman = do num <- romanNumeral True return (UpperRoman, num) -- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: GenParser Char st (ListNumberStyle, Int) +lowerRoman :: Parsec [Char] st (ListNumberStyle, Int) lowerRoman = do num <- romanNumeral False return (LowerRoman, num) -- | Parses a decimal numeral and returns (Decimal, number). -decimal :: GenParser Char st (ListNumberStyle, Int) +decimal :: Parsec [Char] st (ListNumberStyle, Int) decimal = do num <- many1 digit return (Decimal, read num) @@ -371,7 +371,7 @@ decimal = do -- returns (DefaultStyle, [next example number]). The next -- example number is incremented in parser state, and the label -- (if present) is added to the label table. -exampleNum :: GenParser Char ParserState (ListNumberStyle, Int) +exampleNum :: Parsec [Char] ParserState (ListNumberStyle, Int) exampleNum = do char '@' lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) @@ -385,38 +385,38 @@ exampleNum = do return (Example, num) -- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: GenParser Char st (ListNumberStyle, Int) +defaultNum :: Parsec [Char] st (ListNumberStyle, Int) defaultNum = do char '#' return (DefaultStyle, 1) -- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: GenParser Char st (ListNumberStyle, Int) +lowerAlpha :: Parsec [Char] st (ListNumberStyle, Int) lowerAlpha = do ch <- oneOf ['a'..'z'] return (LowerAlpha, ord ch - ord 'a' + 1) -- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: GenParser Char st (ListNumberStyle, Int) +upperAlpha :: Parsec [Char] st (ListNumberStyle, Int) upperAlpha = do ch <- oneOf ['A'..'Z'] return (UpperAlpha, ord ch - ord 'A' + 1) -- | Parses a roman numeral i or I -romanOne :: GenParser Char st (ListNumberStyle, Int) +romanOne :: Parsec [Char] st (ListNumberStyle, Int) romanOne = (char 'i' >> return (LowerRoman, 1)) <|> (char 'I' >> return (UpperRoman, 1)) -- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: GenParser Char ParserState ListAttributes +anyOrderedListMarker :: Parsec [Char] ParserState ListAttributes anyOrderedListMarker = choice $ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], numParser <- [decimal, exampleNum, defaultNum, romanOne, lowerAlpha, lowerRoman, upperAlpha, upperRoman]] -- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inPeriod :: Parsec [Char] st (ListNumberStyle, Int) + -> Parsec [Char] st ListAttributes inPeriod num = try $ do (style, start) <- num char '.' @@ -426,16 +426,16 @@ inPeriod num = try $ do return (start, style, delim) -- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inOneParen :: Parsec [Char] st (ListNumberStyle, Int) + -> Parsec [Char] st ListAttributes inOneParen num = try $ do (style, start) <- num char ')' return (start, style, OneParen) -- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inTwoParens :: Parsec [Char] st (ListNumberStyle, Int) + -> Parsec [Char] st ListAttributes inTwoParens num = try $ do char '(' (style, start) <- num @@ -446,7 +446,7 @@ inTwoParens num = try $ do -- returns number. orderedListMarker :: ListNumberStyle -> ListNumberDelim - -> GenParser Char ParserState Int + -> Parsec [Char] ParserState Int orderedListMarker style delim = do let num = defaultNum <|> -- # can continue any kind of list case style of @@ -466,19 +466,19 @@ orderedListMarker style delim = do return start -- | Parses a character reference and returns a Str element. -charRef :: GenParser Char st Inline +charRef :: Parsec [Char] st Inline charRef = do c <- characterReference return $ Str [c] -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Block]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState sep - -> GenParser Char ParserState end - -> GenParser Char ParserState [Inline] - -> GenParser Char ParserState Block +tableWith :: Parsec [Char] ParserState ([[Block]], [Alignment], [Int]) + -> ([Int] -> Parsec [Char] ParserState [[Block]]) + -> Parsec [Char] ParserState sep + -> Parsec [Char] ParserState end + -> Parsec [Char] ParserState [Inline] + -> Parsec [Char] ParserState Block tableWith headerParser rowParser lineParser footerParser captionParser = try $ do caption' <- option [] captionParser (heads, aligns, indices) <- headerParser @@ -521,10 +521,10 @@ widthsFromIndices numColumns' indices = -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTableWith :: GenParser Char ParserState Block -- ^ Block parser - -> GenParser Char ParserState [Inline] -- ^ Caption parser +gridTableWith :: Parsec [Char] ParserState Block -- ^ Block parser + -> Parsec [Char] ParserState [Inline] -- ^ Caption parser -> Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parsec [Char] ParserState Block gridTableWith block tableCaption headless = tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption @@ -532,13 +532,13 @@ gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ removeTrailingSpace line -gridPart :: Char -> GenParser Char st (Int, Int) +gridPart :: Char -> Parsec [Char] st (Int, Int) gridPart ch = do dashes <- many1 (char ch) char '+' return (length dashes, length dashes + 1) -gridDashedLines :: Char -> GenParser Char st [(Int,Int)] +gridDashedLines :: Char -> Parsec [Char] st [(Int,Int)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline removeFinalBar :: String -> String @@ -546,13 +546,13 @@ removeFinalBar = reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse -- | Separator between rows of grid table. -gridTableSep :: Char -> GenParser Char ParserState Char +gridTableSep :: Char -> Parsec [Char] ParserState Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. gridTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) + -> Parsec [Char] ParserState Block + -> Parsec [Char] ParserState ([[Block]], [Alignment], [Int]) gridTableHeader headless block = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -576,16 +576,16 @@ gridTableHeader headless block = try $ do map removeLeadingTrailingSpace rawHeads return (heads, aligns, indices) -gridTableRawLine :: [Int] -> GenParser Char ParserState [String] +gridTableRawLine :: [Int] -> Parsec [Char] ParserState [String] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline return (gridTableSplitLine indices line) -- | Parse row of grid table. -gridTableRow :: GenParser Char ParserState Block +gridTableRow :: Parsec [Char] ParserState Block -> [Int] - -> GenParser Char ParserState [[Block]] + -> Parsec [Char] ParserState [[Block]] gridTableRow block indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ @@ -604,13 +604,13 @@ compactifyCell :: [Block] -> [Block] compactifyCell bs = head $ compactify [bs] -- | Parse footer for a grid table. -gridTableFooter :: GenParser Char ParserState [Char] +gridTableFooter :: Parsec [Char] ParserState [Char] gridTableFooter = blanklines --- -- | Parse a string with a given parser and state. -readWith :: GenParser t ParserState a -- ^ parser +readWith :: Parsec [t] ParserState a -- ^ parser -> ParserState -- ^ initial state -> [t] -- ^ input -> a @@ -620,7 +620,7 @@ readWith parser state input = Right result -> result -- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => GenParser Char ParserState a +testStringWith :: (Show a) => Parsec [Char] ParserState a -> String -> IO () testStringWith parser str = UTF8.putStrLn $ show $ @@ -735,25 +735,25 @@ lookupKeySrc table key = case M.lookup key table of Just src -> Just src -- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: GenParser tok ParserState () +failUnlessSmart :: Parsec [tok] ParserState () failUnlessSmart = getState >>= guard . stateSmart -smartPunctuation :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +smartPunctuation :: Parsec [Char] ParserState Inline + -> Parsec [Char] ParserState Inline smartPunctuation inlineParser = do failUnlessSmart choice [ quoted inlineParser, apostrophe, dash, ellipses ] -apostrophe :: GenParser Char ParserState Inline +apostrophe :: Parsec [Char] ParserState Inline apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019") -quoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +quoted :: Parsec [Char] ParserState Inline + -> Parsec [Char] ParserState Inline quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser withQuoteContext :: QuoteContext - -> (GenParser Char ParserState Inline) - -> GenParser Char ParserState Inline + -> (Parsec [Char] ParserState Inline) + -> Parsec [Char] ParserState Inline withQuoteContext context parser = do oldState <- getState let oldQuoteContext = stateQuoteContext oldState @@ -763,39 +763,39 @@ withQuoteContext context parser = do setState newState { stateQuoteContext = oldQuoteContext } return result -singleQuoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +singleQuoted :: Parsec [Char] ParserState Inline + -> Parsec [Char] ParserState Inline singleQuoted inlineParser = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= return . Quoted SingleQuote . normalizeSpaces -doubleQuoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +doubleQuoted :: Parsec [Char] ParserState Inline + -> Parsec [Char] ParserState Inline doubleQuoted inlineParser = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ do contents <- manyTill inlineParser doubleQuoteEnd return . Quoted DoubleQuote . normalizeSpaces $ contents -failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () +failIfInQuoteContext :: QuoteContext -> Parsec [tok] ParserState () failIfInQuoteContext context = do st <- getState if stateQuoteContext st == context then fail "already inside quotes" else return () -charOrRef :: [Char] -> GenParser Char st Char +charOrRef :: [Char] -> Parsec [Char] st Char charOrRef cs = oneOf cs <|> try (do c <- characterReference guard (c `elem` cs) return c) -updateLastStrPos :: GenParser Char ParserState () +updateLastStrPos :: Parsec [Char] ParserState () updateLastStrPos = getPosition >>= \p -> updateState $ \s -> s{ stateLastStrPos = Just p } -singleQuoteStart :: GenParser Char ParserState () +singleQuoteStart :: Parsec [Char] ParserState () singleQuoteStart = do failIfInQuoteContext InSingleQuote pos <- getPosition @@ -810,28 +810,28 @@ singleQuoteStart = do -- possess/contraction return () -singleQuoteEnd :: GenParser Char st () +singleQuoteEnd :: Parsec [Char] st () singleQuoteEnd = try $ do charOrRef "'\8217\146" notFollowedBy alphaNum -doubleQuoteStart :: GenParser Char ParserState () +doubleQuoteStart :: Parsec [Char] ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) -doubleQuoteEnd :: GenParser Char st () +doubleQuoteEnd :: Parsec [Char] st () doubleQuoteEnd = do charOrRef "\"\8221\148" return () -ellipses :: GenParser Char st Inline +ellipses :: Parsec [Char] st Inline ellipses = do try (charOrRef "\8230\133") <|> try (string "..." >> return '…') return (Str "\8230") -dash :: GenParser Char ParserState Inline +dash :: Parsec [Char] ParserState Inline dash = do oldDashes <- stateOldDashes `fmap` getState if oldDashes @@ -839,28 +839,28 @@ dash = do else Str `fmap` (hyphenDash <|> emDash <|> enDash) -- Two hyphens = en-dash, three = em-dash -hyphenDash :: GenParser Char st String +hyphenDash :: Parsec [Char] st String hyphenDash = do try $ string "--" option "\8211" (char '-' >> return "\8212") -emDash :: GenParser Char st String +emDash :: Parsec [Char] st String emDash = do try (charOrRef "\8212\151") return "\8212" -enDash :: GenParser Char st String +enDash :: Parsec [Char] st String enDash = do try (charOrRef "\8212\151") return "\8211" -enDashOld :: GenParser Char st Inline +enDashOld :: Parsec [Char] st Inline enDashOld = do try (charOrRef "\8211\150") <|> try (char '-' >> lookAhead (satisfy isDigit) >> return '–') return (Str "\8211") -emDashOld :: GenParser Char st Inline +emDashOld :: Parsec [Char] st Inline emDashOld = do try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-') return (Str "\8212") @@ -870,12 +870,12 @@ emDashOld = do -- -- | Parse a \newcommand or \renewcommand macro definition. -macro :: GenParser Char ParserState Block +macro :: Parsec [Char] ParserState Block macro = do apply <- stateApplyMacros `fmap` getState inp <- getInput case parseMacroDefinitions inp of - ([], _) -> pzero + ([], _) -> mzero (ms, rest) -> do def' <- count (length inp - length rest) anyChar if apply then do @@ -885,7 +885,7 @@ macro = do else return $ RawBlock "latex" def' -- | Apply current macros to string. -applyMacros' :: String -> GenParser Char ParserState String +applyMacros' :: String -> Parsec [Char] ParserState String applyMacros' target = do apply <- liftM stateApplyMacros getState if apply diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 536bddd39..3b8095f84 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -36,8 +36,8 @@ module Text.Pandoc.Readers.HTML ( readHtml , isCommentTag ) where -import Text.ParserCombinators.Parsec -import Text.ParserCombinators.Parsec.Pos +import Text.Parsec +import Text.Parsec.Pos import Text.HTML.TagSoup import Text.HTML.TagSoup.Match import Text.Pandoc.Definition @@ -47,7 +47,7 @@ import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) import Data.Char ( isDigit, toLower ) -import Control.Monad ( liftM, guard, when ) +import Control.Monad ( liftM, guard, when, mzero ) isSpace :: Char -> Bool isSpace ' ' = True @@ -68,7 +68,7 @@ readHtml st inp = Pandoc meta blocks then parseHeader tags else (Meta [] [] [], tags) -type TagParser = GenParser (Tag String) ParserState +type TagParser = Parsec [Tag String] ParserState -- TODO - fix this - not every header has a title tag parseHeader :: [Tag String] -> (Meta, [Tag String]) @@ -417,7 +417,7 @@ pCloses tagtype = try $ do (TagClose "ul") | tagtype == "li" -> return () (TagClose "ol") | tagtype == "li" -> return () (TagClose "dl") | tagtype == "li" -> return () - _ -> pzero + _ -> mzero pTagText :: TagParser [Inline] pTagText = try $ do @@ -432,11 +432,11 @@ pBlank = try $ do (TagText str) <- pSatisfy isTagText guard $ all isSpace str -pTagContents :: GenParser Char ParserState Inline +pTagContents :: Parsec [Char] ParserState Inline pTagContents = pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad -pStr :: GenParser Char ParserState Inline +pStr :: Parsec [Char] ParserState Inline pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) @@ -455,13 +455,13 @@ isSpecial '\8220' = True isSpecial '\8221' = True isSpecial _ = False -pSymbol :: GenParser Char ParserState Inline +pSymbol :: Parsec [Char] ParserState Inline pSymbol = satisfy isSpecial >>= return . Str . (:[]) isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML -pBad :: GenParser Char ParserState Inline +pBad :: Parsec [Char] ParserState Inline pBad = do c <- satisfy isBad let c' = case c of @@ -495,7 +495,7 @@ pBad = do _ -> '?' return $ Str [c'] -pSpace :: GenParser Char ParserState Inline +pSpace :: Parsec [Char] ParserState Inline pSpace = many1 (satisfy isSpace) >> return Space -- @@ -593,7 +593,7 @@ _ `closes` _ = False --- parsers for use in markdown, textile readers -- | Matches a stretch of HTML in balanced tags. -htmlInBalanced :: (Tag String -> Bool) -> GenParser Char ParserState String +htmlInBalanced :: (Tag String -> Bool) -> Parsec [Char] ParserState String htmlInBalanced f = try $ do (TagOpen t _, tag) <- htmlTag f guard $ '/' `notElem` tag -- not a self-closing tag @@ -606,7 +606,7 @@ htmlInBalanced f = try $ do return $ tag ++ concat contents ++ endtag -- | Matches a tag meeting a certain condition. -htmlTag :: (Tag String -> Bool) -> GenParser Char ParserState (Tag String, String) +htmlTag :: (Tag String -> Bool) -> Parsec [Char] ParserState (Tag String, String) htmlTag f = try $ do lookAhead (char '<') (next : _) <- getInput >>= return . canonicalizeTags . parseTags diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 3178945e4..6043d79b1 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -33,7 +33,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, handleIncludes ) where -import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional) +import Text.Parsec hiding ((<|>), space, many, optional) import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing @@ -64,7 +64,7 @@ parseLaTeX = do let date' = stateDate st return $ Pandoc (Meta title' authors' date') $ toList bs -type LP = GenParser Char ParserState +type LP = Parsec [Char] ParserState anyControlSeq :: LP String anyControlSeq = do @@ -713,10 +713,10 @@ verbatimEnv = do rest <- getInput return (r,rest) -rawLaTeXBlock :: GenParser Char ParserState String +rawLaTeXBlock :: Parsec [Char] ParserState String rawLaTeXBlock = snd <$> withRaw (environment <|> blockCommand) -rawLaTeXInline :: GenParser Char ParserState Inline +rawLaTeXInline :: Parsec [Char] ParserState Inline rawLaTeXInline = do (res, raw) <- withRaw inlineCommand if res == mempty diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 51a727996..356566724 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -43,7 +43,7 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) -import Text.ParserCombinators.Parsec +import Text.Parsec import Control.Monad (when, liftM, guard, mzero) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) @@ -83,14 +83,14 @@ isBlank _ = False -- auxiliary functions -- -indentSpaces :: GenParser Char ParserState [Char] +indentSpaces :: Parsec [Char] ParserState [Char] indentSpaces = try $ do state <- getState let tabStop = stateTabStop state count tabStop (char ' ') <|> string "\t" <?> "indentation" -nonindentSpaces :: GenParser Char ParserState [Char] +nonindentSpaces :: Parsec [Char] ParserState [Char] nonindentSpaces = do state <- getState let tabStop = stateTabStop state @@ -99,30 +99,30 @@ nonindentSpaces = do then return sps else unexpected "indented line" -skipNonindentSpaces :: GenParser Char ParserState () +skipNonindentSpaces :: Parsec [Char] ParserState () skipNonindentSpaces = do state <- getState atMostSpaces (stateTabStop state - 1) -atMostSpaces :: Int -> GenParser Char ParserState () +atMostSpaces :: Int -> Parsec [Char] ParserState () atMostSpaces 0 = notFollowedBy (char ' ') atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return () -litChar :: GenParser Char ParserState Char +litChar :: Parsec [Char] ParserState Char litChar = escapedChar' <|> noneOf "\n" <|> (newline >> notFollowedBy blankline >> return ' ') -- | Fail unless we're at beginning of a line. -failUnlessBeginningOfLine :: GenParser tok st () +failUnlessBeginningOfLine :: Parsec [tok] st () failUnlessBeginningOfLine = do pos <- getPosition if sourceColumn pos == 1 then return () else fail "not beginning of line" -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: GenParser Char ParserState Inline - -> GenParser Char ParserState [Inline] +inlinesInBalancedBrackets :: Parsec [Char] ParserState Inline + -> Parsec [Char] ParserState [Inline] inlinesInBalancedBrackets parser = try $ do char '[' result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser @@ -137,7 +137,7 @@ inlinesInBalancedBrackets parser = try $ do -- document structure -- -titleLine :: GenParser Char ParserState [Inline] +titleLine :: Parsec [Char] ParserState [Inline] titleLine = try $ do char '%' skipSpaces @@ -146,7 +146,7 @@ titleLine = try $ do newline return $ normalizeSpaces res -authorsLine :: GenParser Char ParserState [[Inline]] +authorsLine :: Parsec [Char] ParserState [[Inline]] authorsLine = try $ do char '%' skipSpaces @@ -157,14 +157,14 @@ authorsLine = try $ do newline return $ filter (not . null) $ map normalizeSpaces authors -dateLine :: GenParser Char ParserState [Inline] +dateLine :: Parsec [Char] ParserState [Inline] dateLine = try $ do char '%' skipSpaces date <- manyTill inline newline return $ normalizeSpaces date -titleBlock :: GenParser Char ParserState ([Inline], [[Inline]], [Inline]) +titleBlock :: Parsec [Char] ParserState ([Inline], [[Inline]], [Inline]) titleBlock = try $ do failIfStrict title <- option [] titleLine @@ -173,7 +173,7 @@ titleBlock = try $ do optional blanklines return (title, author, date) -parseMarkdown :: GenParser Char ParserState Pandoc +parseMarkdown :: Parsec [Char] ParserState Pandoc parseMarkdown = do -- markdown allows raw HTML updateState (\state -> state { stateParseRaw = True }) @@ -182,7 +182,7 @@ parseMarkdown = do -- docMinusKeys is the raw document with blanks where the keys/notes were... st <- getState let firstPassParser = referenceKey - <|> (if stateStrict st then pzero else noteBlock) + <|> (if stateStrict st then mzero else noteBlock) <|> liftM snd (withRaw codeBlockDelimited) <|> lineClump docMinusKeys <- liftM concat $ manyTill firstPassParser eof @@ -211,7 +211,7 @@ parseMarkdown = do -- initial pass for references and notes -- -referenceKey :: GenParser Char ParserState [Char] +referenceKey :: Parsec [Char] ParserState [Char] referenceKey = try $ do startPos <- getPosition skipNonindentSpaces @@ -238,7 +238,7 @@ referenceKey = try $ do -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -referenceTitle :: GenParser Char ParserState String +referenceTitle :: Parsec [Char] ParserState String referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words) @@ -247,23 +247,23 @@ referenceTitle = try $ do notFollowedBy (noneOf ")\n"))) return $ fromEntities tit -noteMarker :: GenParser Char ParserState [Char] +noteMarker :: Parsec [Char] ParserState [Char] noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') -rawLine :: GenParser Char ParserState [Char] +rawLine :: Parsec [Char] ParserState [Char] rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: GenParser Char ParserState [Char] +rawLines :: Parsec [Char] ParserState [Char] rawLines = do first <- anyLine rest <- many rawLine return $ unlines (first:rest) -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parsec [Char] ParserState [Char] noteBlock = try $ do startPos <- getPosition skipNonindentSpaces @@ -287,10 +287,10 @@ noteBlock = try $ do -- parsing blocks -- -parseBlocks :: GenParser Char ParserState [Block] +parseBlocks :: Parsec [Char] ParserState [Block] parseBlocks = manyTill block eof -block :: GenParser Char ParserState Block +block :: Parsec [Char] ParserState Block block = do st <- getState choice (if stateStrict st @@ -325,10 +325,10 @@ block = do -- header blocks -- -header :: GenParser Char ParserState Block +header :: Parsec [Char] ParserState Block header = setextHeader <|> atxHeader <?> "header" -atxHeader :: GenParser Char ParserState Block +atxHeader :: Parsec [Char] ParserState Block atxHeader = try $ do level <- many1 (char '#') >>= return . length notFollowedBy (char '.' <|> char ')') -- this would be a list @@ -336,10 +336,10 @@ atxHeader = try $ do text <- manyTill inline atxClosing >>= return . normalizeSpaces return $ Header level text -atxClosing :: GenParser Char st [Char] +atxClosing :: Parsec [Char] st [Char] atxClosing = try $ skipMany (char '#') >> blanklines -setextHeader :: GenParser Char ParserState Block +setextHeader :: Parsec [Char] ParserState Block setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. @@ -355,7 +355,7 @@ setextHeader = try $ do -- hrule block -- -hrule :: GenParser Char st Block +hrule :: Parsec [Char] st Block hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -369,12 +369,12 @@ hrule = try $ do -- code blocks -- -indentedLine :: GenParser Char ParserState [Char] +indentedLine :: Parsec [Char] ParserState [Char] indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") blockDelimiter :: (Char -> Bool) -> Maybe Int - -> GenParser Char st (Int, (String, [String], [(String, String)]), Char) + -> Parsec [Char] st (Int, (String, [String], [(String, String)]), Char) blockDelimiter f len = try $ do c <- lookAhead (satisfy f) size <- case len of @@ -388,7 +388,7 @@ blockDelimiter f len = try $ do blankline return (size, attr, c) -attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) +attributes :: Parsec [Char] st ([Char], [[Char]], [([Char], [Char])]) attributes = try $ do char '{' spnl @@ -400,28 +400,28 @@ attributes = try $ do | otherwise = firstNonNull xs return (firstNonNull $ reverse ids, concat classes, concat keyvals) -attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) +attribute :: Parsec [Char] st ([Char], [[Char]], [([Char], [Char])]) attribute = identifierAttr <|> classAttr <|> keyValAttr -identifier :: GenParser Char st [Char] +identifier :: Parsec [Char] st [Char] identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." return (first:rest) -identifierAttr :: GenParser Char st ([Char], [a], [a1]) +identifierAttr :: Parsec [Char] st ([Char], [a], [a1]) identifierAttr = try $ do char '#' result <- identifier return (result,[],[]) -classAttr :: GenParser Char st ([Char], [[Char]], [a]) +classAttr :: Parsec [Char] st ([Char], [[Char]], [a]) classAttr = try $ do char '.' result <- identifier return ("",[result],[]) -keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])]) +keyValAttr :: Parsec [Char] st ([Char], [a], [([Char], [Char])]) keyValAttr = try $ do key <- identifier char '=' @@ -430,14 +430,14 @@ keyValAttr = try $ do <|> many nonspaceChar return ("",[],[(key,val)]) -codeBlockDelimited :: GenParser Char st Block +codeBlockDelimited :: Parsec [Char] st Block codeBlockDelimited = try $ do (size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines return $ CodeBlock attr $ intercalate "\n" contents -codeBlockIndented :: GenParser Char ParserState Block +codeBlockIndented :: Parsec [Char] ParserState Block codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -448,7 +448,7 @@ codeBlockIndented = do return $ CodeBlock ("", stateIndentedCodeClasses st, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock :: Parsec [Char] ParserState Block lhsCodeBlock = do failUnlessLHS liftM (CodeBlock ("",["sourceCode","literate","haskell"],[])) @@ -456,7 +456,7 @@ lhsCodeBlock = do <|> liftM (CodeBlock ("",["sourceCode","haskell"],[])) lhsCodeBlockInverseBird -lhsCodeBlockLaTeX :: GenParser Char ParserState String +lhsCodeBlockLaTeX :: Parsec [Char] ParserState String lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline @@ -464,13 +464,13 @@ lhsCodeBlockLaTeX = try $ do blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: GenParser Char ParserState String +lhsCodeBlockBird :: Parsec [Char] ParserState String lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: GenParser Char ParserState String +lhsCodeBlockInverseBird :: Parsec [Char] ParserState String lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: Char -> GenParser Char ParserState String +lhsCodeBlockBirdWith :: Char -> Parsec [Char] ParserState String lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -482,7 +482,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Char -> GenParser Char st [Char] +birdTrackLine :: Char -> Parsec [Char] st [Char] birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -494,10 +494,10 @@ birdTrackLine c = try $ do -- block quotes -- -emailBlockQuoteStart :: GenParser Char ParserState Char +emailBlockQuoteStart :: Parsec [Char] ParserState Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ') -emailBlockQuote :: GenParser Char ParserState [[Char]] +emailBlockQuote :: Parsec [Char] ParserState [[Char]] emailBlockQuote = try $ do emailBlockQuoteStart raw <- sepBy (many (nonEndline <|> @@ -508,7 +508,7 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: GenParser Char ParserState Block +blockQuote :: Parsec [Char] ParserState Block blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: @@ -519,7 +519,7 @@ blockQuote = do -- list blocks -- -bulletListStart :: GenParser Char ParserState () +bulletListStart :: Parsec [Char] ParserState () bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces @@ -528,7 +528,7 @@ bulletListStart = try $ do spaceChar skipSpaces -anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) +anyOrderedListStart :: Parsec [Char] ParserState (Int, ListNumberStyle, ListNumberDelim) anyOrderedListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces @@ -548,11 +548,11 @@ anyOrderedListStart = try $ do skipSpaces return (num, style, delim) -listStart :: GenParser Char ParserState () +listStart :: Parsec [Char] ParserState () listStart = bulletListStart <|> (anyOrderedListStart >> return ()) -- parse a line of a list item (start = parser for beginning of list item) -listLine :: GenParser Char ParserState [Char] +listLine :: Parsec [Char] ParserState [Char] listLine = try $ do notFollowedBy blankline notFollowedBy' (do indentSpaces @@ -562,8 +562,8 @@ listLine = try $ do return $ concat chunks ++ "\n" -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState a - -> GenParser Char ParserState [Char] +rawListItem :: Parsec [Char] ParserState a + -> Parsec [Char] ParserState [Char] rawListItem start = try $ do start first <- listLine @@ -574,14 +574,14 @@ rawListItem start = try $ do -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: GenParser Char ParserState [Char] +listContinuation :: Parsec [Char] ParserState [Char] listContinuation = try $ do lookAhead indentSpaces result <- many1 listContinuationLine blanks <- many blankline return $ concat result ++ blanks -listContinuationLine :: GenParser Char ParserState [Char] +listContinuationLine :: Parsec [Char] ParserState [Char] listContinuationLine = try $ do notFollowedBy blankline notFollowedBy' listStart @@ -589,8 +589,8 @@ listContinuationLine = try $ do result <- manyTill anyChar newline return $ result ++ "\n" -listItem :: GenParser Char ParserState a - -> GenParser Char ParserState [Block] +listItem :: Parsec [Char] ParserState a + -> Parsec [Char] ParserState [Block] listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -606,7 +606,7 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: GenParser Char ParserState Block +orderedList :: Parsec [Char] ParserState Block orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart items <- many1 $ listItem $ try $ @@ -615,13 +615,13 @@ orderedList = try $ do orderedListMarker style delim return $ OrderedList (start, style, delim) $ compactify items -bulletList :: GenParser Char ParserState Block +bulletList :: Parsec [Char] ParserState Block bulletList = many1 (listItem bulletListStart) >>= return . BulletList . compactify -- definition lists -defListMarker :: GenParser Char ParserState () +defListMarker :: Parsec [Char] ParserState () defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' @@ -630,10 +630,10 @@ defListMarker = do let remaining = tabStop - (length sps + 1) if remaining > 0 then count remaining (char ' ') <|> string "\t" - else pzero + else mzero return () -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parsec [Char] ParserState ([Inline], [[Block]]) definitionListItem = try $ do -- first, see if this has any chance of being a definition list: lookAhead (anyLine >> optional blankline >> defListMarker) @@ -647,7 +647,7 @@ definitionListItem = try $ do updateState (\st -> st {stateParserContext = oldContext}) return ((normalizeSpaces term), contents) -defRawBlock :: GenParser Char ParserState [Char] +defRawBlock :: Parsec [Char] ParserState [Char] defRawBlock = try $ do defListMarker firstline <- anyLine @@ -659,7 +659,7 @@ defRawBlock = try $ do return $ unlines lns ++ trl return $ firstline ++ "\n" ++ unlines rawlines ++ trailing ++ cont -definitionList :: GenParser Char ParserState Block +definitionList :: Parsec [Char] ParserState Block definitionList = do items <- many1 definitionListItem -- "compactify" the definition list: @@ -688,7 +688,7 @@ isHtmlOrBlank (Space) = True isHtmlOrBlank (LineBreak) = True isHtmlOrBlank _ = False -para :: GenParser Char ParserState Block +para :: Parsec [Char] ParserState Block para = try $ do result <- liftM normalizeSpaces $ many1 inline guard $ not . all isHtmlOrBlank $ result @@ -699,17 +699,17 @@ para = try $ do lookAhead (blockQuote <|> header) >> return "") return $ Para result -plain :: GenParser Char ParserState Block +plain :: Parsec [Char] ParserState Block plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces -- -- raw html -- -htmlElement :: GenParser Char ParserState [Char] +htmlElement :: Parsec [Char] ParserState [Char] htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: GenParser Char ParserState Block +htmlBlock :: Parsec [Char] ParserState Block htmlBlock = try $ do failUnlessBeginningOfLine first <- htmlElement @@ -717,12 +717,12 @@ htmlBlock = try $ do finalNewlines <- many newline return $ RawBlock "html" $ first ++ finalSpace ++ finalNewlines -strictHtmlBlock :: GenParser Char ParserState [Char] +strictHtmlBlock :: Parsec [Char] ParserState [Char] strictHtmlBlock = do failUnlessBeginningOfLine htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: GenParser Char ParserState String +rawVerbatimBlock :: Parsec [Char] ParserState String rawVerbatimBlock = try $ do (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> t == "pre" || t == "style" || t == "script") @@ -730,7 +730,7 @@ rawVerbatimBlock = try $ do contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags [TagClose tag] -rawTeXBlock :: GenParser Char ParserState Block +rawTeXBlock :: Parsec [Char] ParserState Block rawTeXBlock = do failIfStrict result <- liftM (RawBlock "latex") rawLaTeXBlock @@ -738,7 +738,7 @@ rawTeXBlock = do spaces return result -rawHtmlBlocks :: GenParser Char ParserState Block +rawHtmlBlocks :: Parsec [Char] ParserState Block rawHtmlBlocks = do htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|> liftM snd (htmlTag isBlockTag) @@ -762,7 +762,7 @@ rawHtmlBlocks = do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. dashedLine :: Char - -> GenParser Char st (Int, Int) + -> Parsec [Char] st (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -771,7 +771,7 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) + -> Parsec [Char] ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -795,16 +795,16 @@ simpleTableHeader headless = try $ do return (heads, aligns, indices) -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: GenParser Char ParserState [Char] +tableFooter :: Parsec [Char] ParserState [Char] tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep :: GenParser Char ParserState Char +tableSep :: Parsec [Char] ParserState Char tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. rawTableLine :: [Int] - -> GenParser Char ParserState [String] + -> Parsec [Char] ParserState [String] rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline @@ -813,12 +813,12 @@ rawTableLine indices = do -- Parse a table line and return a list of lists of blocks (columns). tableLine :: [Int] - -> GenParser Char ParserState [[Block]] + -> Parsec [Char] ParserState [[Block]] tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: [Int] - -> GenParser Char ParserState [[Block]] + -> Parsec [Char] ParserState [[Block]] multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines @@ -826,7 +826,7 @@ multilineRow indices = do -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: GenParser Char ParserState [Inline] +tableCaption :: Parsec [Char] ParserState [Inline] tableCaption = try $ do skipNonindentSpaces string ":" <|> string "Table:" @@ -836,7 +836,7 @@ tableCaption = try $ do -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parsec [Char] ParserState Block simpleTable headless = do Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine (return ()) @@ -850,12 +850,12 @@ simpleTable headless = do -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). multilineTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parsec [Char] ParserState Block multilineTable headless = tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption multilineTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) + -> Parsec [Char] ParserState ([[Block]], [Alignment], [Int]) multilineTableHeader headless = try $ do if headless then return '\n' @@ -903,10 +903,10 @@ alignType strLst len = (False, False) -> AlignDefault gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parsec [Char] ParserState Block gridTable = gridTableWith block tableCaption -table :: GenParser Char ParserState Block +table :: Parsec [Char] ParserState Block table = multilineTable False <|> simpleTable True <|> simpleTable False <|> multilineTable True <|> gridTable False <|> gridTable True <?> "table" @@ -915,10 +915,10 @@ table = multilineTable False <|> simpleTable True <|> -- inline -- -inline :: GenParser Char ParserState Inline +inline :: Parsec [Char] ParserState Inline inline = choice inlineParsers <?> "inline" -inlineParsers :: [GenParser Char ParserState Inline] +inlineParsers :: [Parsec [Char] ParserState Inline] inlineParsers = [ whitespace , str , endline @@ -945,7 +945,7 @@ inlineParsers = [ whitespace , symbol , ltSign ] -escapedChar' :: GenParser Char ParserState Char +escapedChar' :: Parsec [Char] ParserState Char escapedChar' = try $ do char '\\' state <- getState @@ -953,7 +953,7 @@ escapedChar' = try $ do then oneOf "\\`*_{}[]()>#+-.!~" else satisfy (not . isAlphaNum) -escapedChar :: GenParser Char ParserState Inline +escapedChar :: Parsec [Char] ParserState Inline escapedChar = do result <- escapedChar' return $ case result of @@ -961,7 +961,7 @@ escapedChar = do '\n' -> LineBreak -- "\[newline]" is a linebreak _ -> Str [result] -ltSign :: GenParser Char ParserState Inline +ltSign :: Parsec [Char] ParserState Inline ltSign = do st <- getState if stateStrict st @@ -969,7 +969,7 @@ ltSign = do else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html return $ Str ['<'] -exampleRef :: GenParser Char ParserState Inline +exampleRef :: Parsec [Char] ParserState Inline exampleRef = try $ do char '@' lab <- many1 (alphaNum <|> oneOf "-_") @@ -977,7 +977,7 @@ exampleRef = try $ do -- later. See the end of parseMarkdown. return $ Str $ '@' : lab -symbol :: GenParser Char ParserState Inline +symbol :: Parsec [Char] ParserState Inline symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' @@ -986,7 +986,7 @@ symbol = do return $ Str [result] -- parses inline code, between n `s and n `s -code :: GenParser Char ParserState Inline +code :: Parsec [Char] ParserState Inline code = try $ do starts <- many1 (char '`') skipSpaces @@ -997,26 +997,26 @@ code = try $ do attr <- option ([],[],[]) (try $ optional whitespace >> attributes) return $ Code attr $ removeLeadingTrailingSpace $ concat result -mathWord :: GenParser Char st [Char] +mathWord :: Parsec [Char] st [Char] mathWord = liftM concat $ many1 mathChunk -mathChunk :: GenParser Char st [Char] +mathChunk :: Parsec [Char] st [Char] mathChunk = do char '\\' c <- anyChar return ['\\',c] <|> many1 (satisfy $ \c -> not (isBlank c || c == '\\' || c == '$')) -math :: GenParser Char ParserState Inline +math :: Parsec [Char] ParserState Inline math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath) <|> (mathInline >>= applyMacros' >>= return . Math InlineMath) -mathDisplay :: GenParser Char ParserState String +mathDisplay :: Parsec [Char] ParserState String mathDisplay = try $ do failIfStrict string "$$" many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$") -mathInline :: GenParser Char ParserState String +mathInline :: Parsec [Char] ParserState String mathInline = try $ do failIfStrict char '$' @@ -1028,7 +1028,7 @@ mathInline = try $ do -- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row -- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub -fours :: GenParser Char st Inline +fours :: Parsec [Char] st Inline fours = try $ do x <- char '*' <|> char '_' <|> char '~' <|> char '^' count 2 $ satisfy (==x) @@ -1037,9 +1037,9 @@ fours = try $ do -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) - => GenParser Char ParserState a - -> GenParser Char ParserState b - -> GenParser Char ParserState [Inline] + => Parsec [Char] ParserState a + -> Parsec [Char] ParserState b + -> Parsec [Char] ParserState [Inline] inlinesBetween start end = normalizeSpaces `liftM` try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' whitespace >> inline) @@ -1047,8 +1047,8 @@ inlinesBetween start end = -- This is used to prevent exponential blowups for things like: -- a**a*a**a*a**a*a**a*a**a*a**a*a** -nested :: GenParser Char ParserState a - -> GenParser Char ParserState a +nested :: Parsec [Char] ParserState a + -> Parsec [Char] ParserState a nested p = do nestlevel <- stateMaxNestingLevel `fmap` getState guard $ nestlevel > 0 @@ -1057,7 +1057,7 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -emph :: GenParser Char ParserState Inline +emph :: Parsec [Char] ParserState Inline emph = Emph `fmap` nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = char '*' >> lookAhead nonspaceChar @@ -1065,7 +1065,7 @@ emph = Emph `fmap` nested ulStart = char '_' >> lookAhead nonspaceChar ulEnd = notFollowedBy' strong >> char '_' -strong :: GenParser Char ParserState Inline +strong :: Parsec [Char] ParserState Inline strong = Strong `liftM` nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = string "**" >> lookAhead nonspaceChar @@ -1073,32 +1073,32 @@ strong = Strong `liftM` nested ulStart = string "__" >> lookAhead nonspaceChar ulEnd = try $ string "__" -strikeout :: GenParser Char ParserState Inline +strikeout :: Parsec [Char] ParserState Inline strikeout = Strikeout `liftM` (failIfStrict >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: GenParser Char ParserState Inline +superscript :: Parsec [Char] ParserState Inline superscript = failIfStrict >> enclosed (char '^') (char '^') (notFollowedBy spaceChar >> inline) >>= -- may not contain Space return . Superscript -subscript :: GenParser Char ParserState Inline +subscript :: Parsec [Char] ParserState Inline subscript = failIfStrict >> enclosed (char '~') (char '~') (notFollowedBy spaceChar >> inline) >>= -- may not contain Space return . Subscript -whitespace :: GenParser Char ParserState Inline +whitespace :: Parsec [Char] ParserState Inline whitespace = spaceChar >> ( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak)) <|> (skipMany spaceChar >> return Space) ) <?> "whitespace" -nonEndline :: GenParser Char st Char +nonEndline :: Parsec [Char] st Char nonEndline = satisfy (/='\n') -str :: GenParser Char ParserState Inline +str :: Parsec [Char] ParserState Inline str = do smart <- stateSmart `fmap` getState a <- alphaNum @@ -1136,7 +1136,7 @@ likelyAbbrev x = in map snd $ filter (\(y,_) -> y == x) abbrPairs -- an endline character that can be treated as a space, not a structural break -endline :: GenParser Char ParserState Inline +endline :: Parsec [Char] ParserState Inline endline = try $ do newline notFollowedBy blankline @@ -1155,20 +1155,20 @@ endline = try $ do -- -- a reference label for a link -reference :: GenParser Char ParserState [Inline] +reference :: Parsec [Char] ParserState [Inline] reference = do notFollowedBy' (string "[^") -- footnote reference result <- inlinesInBalancedBrackets inline return $ normalizeSpaces result -- source for a link, with optional title -source :: GenParser Char ParserState (String, [Char]) +source :: Parsec [Char] ParserState (String, [Char]) source = (try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|> -- the following is needed for cases like: [ref](/url(a). (enclosed (char '(') (char ')') litChar >>= parseFromString source') -- auxiliary function for source -source' :: GenParser Char ParserState (String, [Char]) +source' :: Parsec [Char] ParserState (String, [Char]) source' = do skipSpaces let nl = char '\n' >>~ notFollowedBy blankline @@ -1186,7 +1186,7 @@ source' = do eof return (escapeURI $ removeTrailingSpace src, tit) -linkTitle :: GenParser Char ParserState String +linkTitle :: Parsec [Char] ParserState String linkTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces @@ -1194,7 +1194,7 @@ linkTitle = try $ do tit <- manyTill litChar (try (char delim >> skipSpaces >> eof)) return $ fromEntities tit -link :: GenParser Char ParserState Inline +link :: Parsec [Char] ParserState Inline link = try $ do lab <- reference (src, tit) <- source <|> referenceLink lab @@ -1207,7 +1207,7 @@ delinkify = bottomUp $ concatMap go -- a link like [this][ref] or [this][] or [this] referenceLink :: [Inline] - -> GenParser Char ParserState (String, [Char]) + -> Parsec [Char] ParserState (String, [Char]) referenceLink lab = do ref <- option [] (try (optional (char ' ') >> optional (newline >> skipSpaces) >> reference)) @@ -1217,7 +1217,7 @@ referenceLink lab = do Nothing -> fail "no corresponding key" Just target -> return target -autoLink :: GenParser Char ParserState Inline +autoLink :: Parsec [Char] ParserState Inline autoLink = try $ do char '<' (orig, src) <- uri <|> emailAddress @@ -1227,14 +1227,14 @@ autoLink = try $ do then Link [Str orig] (src, "") else Link [Code ("",["url"],[]) orig] (src, "") -image :: GenParser Char ParserState Inline +image :: Parsec [Char] ParserState Inline image = try $ do char '!' lab <- reference (src, tit) <- source <|> referenceLink lab return $ Image lab (src,tit) -note :: GenParser Char ParserState Inline +note :: Parsec [Char] ParserState Inline note = try $ do failIfStrict ref <- noteMarker @@ -1251,21 +1251,21 @@ note = try $ do updateState $ \st -> st{ stateNotes = notes } return $ Note contents -inlineNote :: GenParser Char ParserState Inline +inlineNote :: Parsec [Char] ParserState Inline inlineNote = try $ do failIfStrict char '^' contents <- inlinesInBalancedBrackets inline return $ Note [Para contents] -rawLaTeXInline' :: GenParser Char ParserState Inline +rawLaTeXInline' :: Parsec [Char] ParserState Inline rawLaTeXInline' = try $ do failIfStrict lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env RawInline _ s <- rawLaTeXInline return $ RawInline "tex" s -- "tex" because it might be context or latex -rawConTeXtEnvironment :: GenParser Char st String +rawConTeXtEnvironment :: Parsec [Char] st String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1274,14 +1274,14 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: (GenParser Char st Char) -> GenParser Char st String +inBrackets :: (Parsec [Char] st Char) -> Parsec [Char] st String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline :: Parsec [Char] ParserState Inline rawHtmlInline = do st <- getState (_,result) <- if stateStrict st @@ -1291,20 +1291,20 @@ rawHtmlInline = do -- Citations -cite :: GenParser Char ParserState Inline +cite :: Parsec [Char] ParserState Inline cite = do failIfStrict citations <- textualCite <|> normalCite return $ Cite citations [] -spnl :: GenParser Char st () +spnl :: Parsec [Char] st () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -textualCite :: GenParser Char ParserState [Citation] +textualCite :: Parsec [Char] ParserState [Citation] textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1319,7 +1319,7 @@ textualCite = try $ do then option [first] $ bareloc first else return $ first : rest -bareloc :: Citation -> GenParser Char ParserState [Citation] +bareloc :: Citation -> Parsec [Char] ParserState [Citation] bareloc c = try $ do spnl char '[' @@ -1329,7 +1329,7 @@ bareloc c = try $ do char ']' return $ c{ citationSuffix = suff } : rest -normalCite :: GenParser Char ParserState [Citation] +normalCite :: Parsec [Char] ParserState [Citation] normalCite = try $ do char '[' spnl @@ -1338,7 +1338,7 @@ normalCite = try $ do char ']' return citations -citeKey :: GenParser Char ParserState (Bool, String) +citeKey :: Parsec [Char] ParserState (Bool, String) citeKey = try $ do suppress_author <- option False (char '-' >> return True) char '@' @@ -1350,7 +1350,7 @@ citeKey = try $ do guard $ key `elem` stateCitations st return (suppress_author, key) -suffix :: GenParser Char ParserState [Inline] +suffix :: Parsec [Char] ParserState [Inline] suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl @@ -1359,14 +1359,14 @@ suffix = try $ do then Space : rest else rest -prefix :: GenParser Char ParserState [Inline] +prefix :: Parsec [Char] ParserState [Inline] prefix = liftM normalizeSpaces $ manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) -citeList :: GenParser Char ParserState [Citation] +citeList :: Parsec [Char] ParserState [Citation] citeList = sepBy1 citation (try $ char ';' >> spnl) -citation :: GenParser Char ParserState Citation +citation :: Parsec [Char] ParserState Citation citation = try $ do pref <- prefix (suppress_author, key) <- citeKey diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index df063ffd5..c5969f145 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -33,8 +33,8 @@ module Text.Pandoc.Readers.RST ( import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing -import Text.ParserCombinators.Parsec -import Control.Monad ( when, liftM, guard ) +import Text.Parsec +import Control.Monad ( when, liftM, guard, mzero ) import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy ) import qualified Data.Map as M import Text.Printf ( printf ) @@ -89,7 +89,7 @@ titleTransform ((Header 1 head1):rest) | (promoteHeaders 1 rest, head1) titleTransform blocks = (blocks, []) -parseRST :: GenParser Char ParserState Pandoc +parseRST :: Parsec [Char] ParserState Pandoc parseRST = do optional blanklines -- skip blank lines at beginning of file startPos <- getPosition @@ -118,10 +118,10 @@ parseRST = do -- parsing blocks -- -parseBlocks :: GenParser Char ParserState [Block] +parseBlocks :: Parsec [Char] ParserState [Block] parseBlocks = manyTill block eof -block :: GenParser Char ParserState Block +block :: Parsec [Char] ParserState Block block = choice [ codeBlock , rawBlock , blockQuote @@ -146,7 +146,7 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: String -> GenParser Char ParserState (String, String) +rawFieldListItem :: String -> Parsec [Char] ParserState (String, String) rawFieldListItem indent = try $ do string indent char ':' @@ -160,7 +160,7 @@ rawFieldListItem indent = try $ do return (name, raw) fieldListItem :: String - -> GenParser Char ParserState (Maybe ([Inline], [[Block]])) + -> Parsec [Char] ParserState (Maybe ([Inline], [[Block]])) fieldListItem indent = try $ do (name, raw) <- rawFieldListItem indent let term = [Str name] @@ -187,7 +187,7 @@ extractContents [Plain auth] = auth extractContents [Para auth] = auth extractContents _ = [] -fieldList :: GenParser Char ParserState Block +fieldList :: Parsec [Char] ParserState Block fieldList = try $ do indent <- lookAhead $ many spaceChar items <- many1 $ fieldListItem indent @@ -199,7 +199,7 @@ fieldList = try $ do -- line block -- -lineBlockLine :: GenParser Char ParserState [Inline] +lineBlockLine :: Parsec [Char] ParserState [Inline] lineBlockLine = try $ do char '|' char ' ' <|> lookAhead (char '\n') @@ -210,7 +210,7 @@ lineBlockLine = try $ do then normalizeSpaces line else Str white : normalizeSpaces line -lineBlock :: GenParser Char ParserState Block +lineBlock :: Parsec [Char] ParserState Block lineBlock = try $ do lines' <- many1 lineBlockLine blanklines @@ -220,14 +220,14 @@ lineBlock = try $ do -- paragraph block -- -para :: GenParser Char ParserState Block +para :: Parsec [Char] ParserState Block para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph" -codeBlockStart :: GenParser Char st Char +codeBlockStart :: Parsec [Char] st Char codeBlockStart = string "::" >> blankline >> blankline -- paragraph that ends in a :: starting a code block -paraBeforeCodeBlock :: GenParser Char ParserState Block +paraBeforeCodeBlock :: Parsec [Char] ParserState Block paraBeforeCodeBlock = try $ do result <- many1 (notFollowedBy' codeBlockStart >> inline) lookAhead (string "::") @@ -236,21 +236,21 @@ paraBeforeCodeBlock = try $ do else (normalizeSpaces result) ++ [Str ":"] -- regular paragraph -paraNormal :: GenParser Char ParserState Block +paraNormal :: Parsec [Char] ParserState Block paraNormal = try $ do result <- many1 inline newline blanklines return $ Para $ normalizeSpaces result -plain :: GenParser Char ParserState Block +plain :: Parsec [Char] ParserState Block plain = many1 inline >>= return . Plain . normalizeSpaces -- -- image block -- -imageBlock :: GenParser Char ParserState Block +imageBlock :: Parsec [Char] ParserState Block imageBlock = try $ do string ".. image:: " src <- manyTill anyChar newline @@ -265,11 +265,11 @@ imageBlock = try $ do -- header blocks -- -header :: GenParser Char ParserState Block +header :: Parsec [Char] ParserState Block header = doubleHeader <|> singleHeader <?> "header" -- a header with lines on top and bottom -doubleHeader :: GenParser Char ParserState Block +doubleHeader :: Parsec [Char] ParserState Block doubleHeader = try $ do c <- oneOf underlineChars rest <- many (char c) -- the top line @@ -294,7 +294,7 @@ doubleHeader = try $ do return $ Header level (normalizeSpaces txt) -- a header with line on the bottom only -singleHeader :: GenParser Char ParserState Block +singleHeader :: Parsec [Char] ParserState Block singleHeader = try $ do notFollowedBy' whitespace txt <- many1 (do {notFollowedBy blankline; inline}) @@ -317,7 +317,7 @@ singleHeader = try $ do -- hrule block -- -hrule :: GenParser Char st Block +hrule :: Parsec [Char] st Block hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -331,14 +331,14 @@ hrule = try $ do -- -- read a line indented by a given string -indentedLine :: String -> GenParser Char st [Char] +indentedLine :: String -> Parsec [Char] st [Char] indentedLine indents = try $ do string indents manyTill anyChar newline -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. -indentedBlock :: GenParser Char st [Char] +indentedBlock :: Parsec [Char] st [Char] indentedBlock = try $ do indents <- lookAhead $ many1 spaceChar lns <- many1 $ try $ do b <- option "" blanklines @@ -347,7 +347,7 @@ indentedBlock = try $ do optional blanklines return $ unlines lns -codeBlock :: GenParser Char st Block +codeBlock :: Parsec [Char] st Block codeBlock = try $ do codeBlockStart result <- indentedBlock @@ -355,7 +355,7 @@ codeBlock = try $ do -- | The 'code-block' directive (from Sphinx) that allows a language to be -- specified. -customCodeBlock :: GenParser Char st Block +customCodeBlock :: Parsec [Char] st Block customCodeBlock = try $ do string ".. code-block:: " language <- manyTill anyChar newline @@ -364,7 +364,7 @@ customCodeBlock = try $ do return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result -figureBlock :: GenParser Char ParserState Block +figureBlock :: Parsec [Char] ParserState Block figureBlock = try $ do string ".. figure::" src <- removeLeadingTrailingSpace `fmap` manyTill anyChar newline @@ -372,24 +372,24 @@ figureBlock = try $ do caption <- parseFromString extractCaption body return $ Para [Image caption (src,"")] -extractCaption :: GenParser Char ParserState [Inline] +extractCaption :: Parsec [Char] ParserState [Inline] extractCaption = try $ do manyTill anyLine blanklines many inline -- | The 'math' directive (from Sphinx) for display math. -mathBlock :: GenParser Char st Block +mathBlock :: Parsec [Char] st Block mathBlock = try $ do string ".. math::" mathBlockMultiline <|> mathBlockOneLine -mathBlockOneLine :: GenParser Char st Block +mathBlockOneLine :: Parsec [Char] st Block mathBlockOneLine = try $ do result <- manyTill anyChar newline blanklines return $ Para [Math DisplayMath $ removeLeadingTrailingSpace result] -mathBlockMultiline :: GenParser Char st Block +mathBlockMultiline :: Parsec [Char] st Block mathBlockMultiline = try $ do blanklines result <- indentedBlock @@ -404,7 +404,7 @@ mathBlockMultiline = try $ do $ filter (not . null) $ splitBy null lns' return $ Para $ map (Math DisplayMath) eqs -lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock :: Parsec [Char] ParserState Block lhsCodeBlock = try $ do failUnlessLHS optional codeBlockStart @@ -418,7 +418,7 @@ lhsCodeBlock = try $ do blanklines return $ CodeBlock ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns' -birdTrackLine :: GenParser Char st [Char] +birdTrackLine :: Parsec [Char] st [Char] birdTrackLine = do char '>' manyTill anyChar newline @@ -427,7 +427,7 @@ birdTrackLine = do -- raw html/latex/etc -- -rawBlock :: GenParser Char st Block +rawBlock :: Parsec [Char] st Block rawBlock = try $ do string ".. raw:: " lang <- many1 (letter <|> digit) @@ -439,7 +439,7 @@ rawBlock = try $ do -- block quotes -- -blockQuote :: GenParser Char ParserState Block +blockQuote :: Parsec [Char] ParserState Block blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: @@ -450,10 +450,10 @@ blockQuote = do -- list blocks -- -list :: GenParser Char ParserState Block +list :: Parsec [Char] ParserState Block list = choice [ bulletList, orderedList, definitionList ] <?> "list" -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parsec [Char] ParserState ([Inline], [[Block]]) definitionListItem = try $ do -- avoid capturing a directive or comment notFollowedBy (try $ char '.' >> char '.') @@ -463,11 +463,11 @@ definitionListItem = try $ do contents <- parseFromString parseBlocks $ raw ++ "\n" return (normalizeSpaces term, [contents]) -definitionList :: GenParser Char ParserState Block +definitionList :: Parsec [Char] ParserState Block definitionList = many1 definitionListItem >>= return . DefinitionList -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: GenParser Char st Int +bulletListStart :: Parsec [Char] st Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -477,14 +477,14 @@ bulletListStart = try $ do -- parses ordered list start and returns its length (inc following whitespace) orderedListStart :: ListNumberStyle -> ListNumberDelim - -> GenParser Char ParserState Int + -> Parsec [Char] ParserState Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) white <- many1 spaceChar return $ markerLen + length white -- parse a line of a list item -listLine :: Int -> GenParser Char ParserState [Char] +listLine :: Int -> Parsec [Char] ParserState [Char] listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength @@ -492,7 +492,7 @@ listLine markerLength = try $ do return $ line ++ "\n" -- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> GenParser Char ParserState [Char] +indentWith :: Int -> Parsec [Char] ParserState [Char] indentWith num = do state <- getState let tabStop = stateTabStop state @@ -502,8 +502,8 @@ indentWith num = do (try (char '\t' >> count (num - tabStop) (char ' '))) ] -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState Int - -> GenParser Char ParserState (Int, [Char]) +rawListItem :: Parsec [Char] ParserState Int + -> Parsec [Char] ParserState (Int, [Char]) rawListItem start = try $ do markerLength <- start firstLine <- manyTill anyChar newline @@ -513,14 +513,14 @@ rawListItem start = try $ do -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Int -> GenParser Char ParserState [Char] +listContinuation :: Int -> Parsec [Char] ParserState [Char] listContinuation markerLength = try $ do blanks <- many1 blankline result <- many1 (listLine markerLength) return $ blanks ++ concat result -listItem :: GenParser Char ParserState Int - -> GenParser Char ParserState [Block] +listItem :: Parsec [Char] ParserState Int + -> Parsec [Char] ParserState [Block] listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) @@ -537,14 +537,14 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return parsed -orderedList :: GenParser Char ParserState Block +orderedList :: Parsec [Char] ParserState Block orderedList = try $ do (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) items <- many1 (listItem (orderedListStart style delim)) let items' = compactify items return $ OrderedList (start, style, delim) items' -bulletList :: GenParser Char ParserState Block +bulletList :: Parsec [Char] ParserState Block bulletList = many1 (listItem bulletListStart) >>= return . BulletList . compactify @@ -552,7 +552,7 @@ bulletList = many1 (listItem bulletListStart) >>= -- default-role block -- -defaultRoleBlock :: GenParser Char ParserState Block +defaultRoleBlock :: Parsec [Char] ParserState Block defaultRoleBlock = try $ do string ".. default-role::" -- doesn't enforce any restrictions on the role name; embedded spaces shouldn't be allowed, for one @@ -570,7 +570,7 @@ defaultRoleBlock = try $ do -- unknown directive (e.g. comment) -- -unknownDirective :: GenParser Char st Block +unknownDirective :: Parsec [Char] st Block unknownDirective = try $ do string ".." notFollowedBy (noneOf " \t\n") @@ -582,7 +582,7 @@ unknownDirective = try $ do --- note block --- -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parsec [Char] ParserState [Char] noteBlock = try $ do startPos <- getPosition string ".." @@ -601,7 +601,7 @@ noteBlock = try $ do -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -noteMarker :: GenParser Char ParserState [Char] +noteMarker :: Parsec [Char] ParserState [Char] noteMarker = do char '[' res <- many1 digit @@ -614,13 +614,13 @@ noteMarker = do -- reference key -- -quotedReferenceName :: GenParser Char ParserState [Inline] +quotedReferenceName :: Parsec [Char] ParserState [Inline] quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! label' <- many1Till inline (char '`') return label' -unquotedReferenceName :: GenParser Char ParserState [Inline] +unquotedReferenceName :: Parsec [Char] ParserState [Inline] unquotedReferenceName = try $ do label' <- many1Till inline (lookAhead $ char ':') return label' @@ -629,24 +629,24 @@ unquotedReferenceName = try $ do -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName' :: GenParser Char st String +simpleReferenceName' :: Parsec [Char] st String simpleReferenceName' = do x <- alphaNum xs <- many $ alphaNum <|> (try $ oneOf "-_:+." >> lookAhead alphaNum) return (x:xs) -simpleReferenceName :: GenParser Char st [Inline] +simpleReferenceName :: Parsec [Char] st [Inline] simpleReferenceName = do raw <- simpleReferenceName' return [Str raw] -referenceName :: GenParser Char ParserState [Inline] +referenceName :: Parsec [Char] ParserState [Inline] referenceName = quotedReferenceName <|> (try $ simpleReferenceName >>~ lookAhead (char ':')) <|> unquotedReferenceName -referenceKey :: GenParser Char ParserState [Char] +referenceKey :: Parsec [Char] ParserState [Char] referenceKey = do startPos <- getPosition (key, target) <- choice [imageKey, anonymousKey, regularKey] @@ -658,7 +658,7 @@ referenceKey = do -- return enough blanks to replace key return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -targetURI :: GenParser Char st [Char] +targetURI :: Parsec [Char] st [Char] targetURI = do skipSpaces optional newline @@ -667,7 +667,7 @@ targetURI = do blanklines return $ escapeURI $ removeLeadingTrailingSpace $ contents -imageKey :: GenParser Char ParserState (Key, Target) +imageKey :: Parsec [Char] ParserState (Key, Target) imageKey = try $ do string ".. |" ref <- manyTill inline (char '|') @@ -676,14 +676,14 @@ imageKey = try $ do src <- targetURI return (toKey (normalizeSpaces ref), (src, "")) -anonymousKey :: GenParser Char st (Key, Target) +anonymousKey :: Parsec [Char] st (Key, Target) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition return (toKey [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, "")) -regularKey :: GenParser Char ParserState (Key, Target) +regularKey :: Parsec [Char] ParserState (Key, Target) regularKey = try $ do string ".. _" ref <- referenceName @@ -708,31 +708,31 @@ regularKey = try $ do -- Grid tables TODO: -- - column spans -dashedLine :: Char -> GenParser Char st (Int, Int) +dashedLine :: Char -> Parsec [Char] st (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Char -> GenParser Char st [(Int,Int)] +simpleDashedLines :: Char -> Parsec [Char] st [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator -simpleTableSep :: Char -> GenParser Char ParserState Char +simpleTableSep :: Char -> Parsec [Char] ParserState Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: GenParser Char ParserState [Char] +simpleTableFooter :: Parsec [Char] ParserState [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: [Int] -> GenParser Char ParserState [String] +simpleTableRawLine :: [Int] -> Parsec [Char] ParserState [String] simpleTableRawLine indices = do line <- many1Till anyChar newline return (simpleTableSplitLine indices line) -- Parse a table row and return a list of blocks (columns). -simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]] +simpleTableRow :: [Int] -> Parsec [Char] ParserState [[Block]] simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices @@ -746,7 +746,7 @@ simpleTableSplitLine indices line = $ tail $ splitByIndices (init indices) line simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) + -> Parsec [Char] ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -766,7 +766,7 @@ simpleTableHeader headless = try $ do -- Parse a simple table. simpleTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parsec [Char] ParserState Block simpleTable headless = do Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return []) -- Simple tables get 0s for relative column widths (i.e., use default) @@ -775,10 +775,10 @@ simpleTable headless = do sep = return () -- optional (simpleTableSep '-') gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parsec [Char] ParserState Block gridTable = gridTableWith block (return []) -table :: GenParser Char ParserState Block +table :: Parsec [Char] ParserState Block table = gridTable False <|> simpleTable False <|> gridTable True <|> simpleTable True <?> "table" @@ -787,7 +787,7 @@ table = gridTable False <|> simpleTable False <|> -- inline -- -inline :: GenParser Char ParserState Inline +inline :: Parsec [Char] ParserState Inline inline = choice [ whitespace , link , str @@ -805,26 +805,26 @@ inline = choice [ whitespace , escapedChar , symbol ] <?> "inline" -hyphens :: GenParser Char ParserState Inline +hyphens :: Parsec [Char] ParserState Inline hyphens = do result <- many1 (char '-') option Space endline -- don't want to treat endline after hyphen or dash as a space return $ Str result -escapedChar :: GenParser Char st Inline +escapedChar :: Parsec [Char] st Inline escapedChar = do c <- escaped anyChar return $ if c == ' ' -- '\ ' is null in RST then Str "" else Str [c] -symbol :: GenParser Char ParserState Inline +symbol :: Parsec [Char] ParserState Inline symbol = do result <- oneOf specialChars return $ Str [result] -- parses inline code, between codeStart and codeEnd -code :: GenParser Char ParserState Inline +code :: Parsec [Char] ParserState Inline code = try $ do string "``" result <- manyTill anyChar (try (string "``")) @@ -832,7 +832,7 @@ code = try $ do $ removeLeadingTrailingSpace $ intercalate " " $ lines result -- succeeds only if we're not right after a str (ie. in middle of word) -atStart :: GenParser Char ParserState a -> GenParser Char ParserState a +atStart :: Parsec [Char] ParserState a -> Parsec [Char] ParserState a atStart p = do pos <- getPosition st <- getState @@ -840,18 +840,18 @@ atStart p = do guard $ stateLastStrPos st /= Just pos p -emph :: GenParser Char ParserState Inline +emph :: Parsec [Char] ParserState Inline emph = enclosed (atStart $ char '*') (char '*') inline >>= return . Emph . normalizeSpaces -strong :: GenParser Char ParserState Inline +strong :: Parsec [Char] ParserState Inline strong = enclosed (atStart $ string "**") (try $ string "**") inline >>= return . Strong . normalizeSpaces -- Parses inline interpreted text which is required to have the given role. -- This decision is based on the role marker (if present), -- and the current default interpreted text role. -interpreted :: [Char] -> GenParser Char ParserState [Char] +interpreted :: [Char] -> Parsec [Char] ParserState [Char] interpreted role = try $ do state <- getState if role == stateRstDefaultRole state @@ -868,19 +868,19 @@ interpreted role = try $ do result <- enclosed (atStart $ char '`') (char '`') anyChar return result -superscript :: GenParser Char ParserState Inline +superscript :: Parsec [Char] ParserState Inline superscript = interpreted "sup" >>= \x -> return (Superscript [Str x]) -subscript :: GenParser Char ParserState Inline +subscript :: Parsec [Char] ParserState Inline subscript = interpreted "sub" >>= \x -> return (Subscript [Str x]) -math :: GenParser Char ParserState Inline +math :: Parsec [Char] ParserState Inline math = interpreted "math" >>= \x -> return (Math InlineMath x) -whitespace :: GenParser Char ParserState Inline +whitespace :: Parsec [Char] ParserState Inline whitespace = many1 spaceChar >> return Space <?> "whitespace" -str :: GenParser Char ParserState Inline +str :: Parsec [Char] ParserState Inline str = do let strChar = noneOf ("\t\n " ++ specialChars) result <- many1 strChar @@ -888,7 +888,7 @@ str = do return $ Str result -- an endline character that can be treated as a space, not a structural break -endline :: GenParser Char ParserState Inline +endline :: Parsec [Char] ParserState Inline endline = try $ do newline notFollowedBy blankline @@ -904,10 +904,10 @@ endline = try $ do -- links -- -link :: GenParser Char ParserState Inline +link :: Parsec [Char] ParserState Inline link = choice [explicitLink, referenceLink, autoLink] <?> "link" -explicitLink :: GenParser Char ParserState Inline +explicitLink :: Parsec [Char] ParserState Inline explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code @@ -919,7 +919,7 @@ explicitLink = try $ do return $ Link (normalizeSpaces label') (escapeURI $ removeLeadingTrailingSpace src, "") -referenceLink :: GenParser Char ParserState Inline +referenceLink :: Parsec [Char] ParserState Inline referenceLink = try $ do label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' state <- getState @@ -931,7 +931,7 @@ referenceLink = try $ do do char '_' let anonKeys = sort $ filter isAnonKey $ M.keys keyTable if null anonKeys - then pzero + then mzero else return (head anonKeys) (src,tit) <- case lookupKeySrc keyTable key of Nothing -> fail "no corresponding key" @@ -940,21 +940,21 @@ referenceLink = try $ do when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } return $ Link (normalizeSpaces label') (src, tit) -autoURI :: GenParser Char ParserState Inline +autoURI :: Parsec [Char] ParserState Inline autoURI = do (orig, src) <- uri return $ Link [Str orig] (src, "") -autoEmail :: GenParser Char ParserState Inline +autoEmail :: Parsec [Char] ParserState Inline autoEmail = do (orig, src) <- emailAddress return $ Link [Str orig] (src, "") -autoLink :: GenParser Char ParserState Inline +autoLink :: Parsec [Char] ParserState Inline autoLink = autoURI <|> autoEmail -- For now, we assume that all substitution references are for images. -image :: GenParser Char ParserState Inline +image :: Parsec [Char] ParserState Inline image = try $ do char '|' ref <- manyTill inline (char '|') @@ -965,7 +965,7 @@ image = try $ do Just target -> return target return $ Image (normalizeSpaces ref) (src, tit) -note :: GenParser Char ParserState Inline +note :: Parsec [Char] ParserState Inline note = try $ do ref <- noteMarker char '_' diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 9f6289289..77347ba4e 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -60,7 +60,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) -import Text.ParserCombinators.Parsec +import Text.Parsec import Text.HTML.TagSoup.Match import Data.Char ( digitToInt, isUpper ) import Control.Monad ( guard, liftM ) @@ -75,7 +75,7 @@ readTextile state s = -- | Generate a Pandoc ADT from a textile document -parseTextile :: GenParser Char ParserState Pandoc +parseTextile :: Parsec [Char] ParserState Pandoc parseTextile = do -- textile allows raw HTML and does smart punctuation by default updateState (\state -> state { stateParseRaw = True, stateSmart = True }) @@ -93,10 +93,10 @@ parseTextile = do blocks <- parseBlocks return $ Pandoc (Meta [] [] []) blocks -- FIXME -noteMarker :: GenParser Char ParserState [Char] +noteMarker :: Parsec [Char] ParserState [Char] noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parsec [Char] ParserState [Char] noteBlock = try $ do startPos <- getPosition ref <- noteMarker @@ -111,11 +111,11 @@ noteBlock = try $ do return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -- | Parse document blocks -parseBlocks :: GenParser Char ParserState [Block] +parseBlocks :: Parsec [Char] ParserState [Block] parseBlocks = manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: [GenParser Char ParserState Block] +blockParsers :: [Parsec [Char] ParserState Block] blockParsers = [ codeBlock , header , blockQuote @@ -128,20 +128,20 @@ blockParsers = [ codeBlock , nullBlock ] -- | Any block in the order of definition of blockParsers -block :: GenParser Char ParserState Block +block :: Parsec [Char] ParserState Block block = choice blockParsers <?> "block" -codeBlock :: GenParser Char ParserState Block +codeBlock :: Parsec [Char] ParserState Block codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: GenParser Char ParserState Block +codeBlockBc :: Parsec [Char] ParserState Block codeBlockBc = try $ do string "bc. " contents <- manyTill anyLine blanklines return $ CodeBlock ("",[],[]) $ unlines contents -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: GenParser Char ParserState Block +codeBlockPre :: Parsec [Char] ParserState Block codeBlockPre = try $ do htmlTag (tagOpen (=="pre") null) result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak) @@ -156,7 +156,7 @@ codeBlockPre = try $ do return $ CodeBlock ("",[],[]) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: GenParser Char ParserState Block +header :: Parsec [Char] ParserState Block header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -165,14 +165,14 @@ header = try $ do return $ Header level name -- | Blockquote of the form "bq. content" -blockQuote :: GenParser Char ParserState Block +blockQuote :: Parsec [Char] ParserState Block blockQuote = try $ do string "bq" >> optional attributes >> char '.' >> whitespace BlockQuote . singleton <$> para -- Horizontal rule -hrule :: GenParser Char st Block +hrule :: Parsec [Char] st Block hrule = try $ do skipSpaces start <- oneOf "-*" @@ -187,39 +187,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: GenParser Char ParserState Block +anyList :: Parsec [Char] ParserState Block anyList = try $ ( (anyListAtDepth 1) <* blanklines ) -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: Int -> GenParser Char ParserState Block +anyListAtDepth :: Int -> Parsec [Char] ParserState Block anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: Int -> GenParser Char ParserState Block +bulletListAtDepth :: Int -> Parsec [Char] ParserState Block bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block] +bulletListItemAtDepth :: Int -> Parsec [Char] ParserState [Block] bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: Int -> GenParser Char ParserState Block +orderedListAtDepth :: Int -> Parsec [Char] ParserState Block orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return (OrderedList (1, DefaultStyle, DefaultDelim) items) -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block] +orderedListItemAtDepth :: Int -> Parsec [Char] ParserState [Block] orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: Char -> Int -> GenParser Char ParserState [Block] +genericListItemAtDepth :: Char -> Int -> Parsec [Char] ParserState [Block] genericListItemAtDepth c depth = try $ do count depth (char c) >> optional attributes >> whitespace p <- inlines @@ -227,22 +227,22 @@ genericListItemAtDepth c depth = try $ do return ((Plain p):sublist) -- | A definition list is a set of consecutive definition items -definitionList :: GenParser Char ParserState Block +definitionList :: Parsec [Char] ParserState Block definitionList = try $ DefinitionList <$> many1 definitionListItem -- | A definition list item in textile begins with '- ', followed by -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parsec [Char] ParserState ([Inline], [[Block]]) definitionListItem = try $ do string "- " term <- many1Till inline (try (whitespace >> string ":=")) def <- inlineDef <|> multilineDef return (term, def) - where inlineDef :: GenParser Char ParserState [[Block]] + where inlineDef :: Parsec [Char] ParserState [[Block]] inlineDef = liftM (\d -> [[Plain d]]) $ try (whitespace >> inlines) - multilineDef :: GenParser Char ParserState [[Block]] + multilineDef :: Parsec [Char] ParserState [[Block]] multilineDef = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) @@ -252,57 +252,57 @@ definitionListItem = try $ do -- | This terminates a block such as a paragraph. Because of raw html -- blocks support, we have to lookAhead for a rawHtmlBlock. -blockBreak :: GenParser Char ParserState () +blockBreak :: Parsec [Char] ParserState () blockBreak = try (newline >> blanklines >> return ()) <|> (lookAhead rawHtmlBlock >> return ()) -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: GenParser Char ParserState Block +rawHtmlBlock :: Parsec [Char] ParserState Block rawHtmlBlock = try $ do (_,b) <- htmlTag isBlockTag optional blanklines return $ RawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: GenParser Char ParserState Block +rawLaTeXBlock' :: Parsec [Char] ParserState Block rawLaTeXBlock' = do failIfStrict RawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: GenParser Char ParserState Block +para :: Parsec [Char] ParserState Block para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak -- Tables -- | A table cell spans until a pipe | -tableCell :: GenParser Char ParserState TableCell +tableCell :: Parsec [Char] ParserState TableCell tableCell = do c <- many1 (noneOf "|\n") content <- parseFromString (many1 inline) c return $ [ Plain $ normalizeSpaces content ] -- | A table row is made of many table cells -tableRow :: GenParser Char ParserState [TableCell] +tableRow :: Parsec [Char] ParserState [TableCell] tableRow = try $ ( char '|' *> (endBy1 tableCell (char '|')) <* newline) -- | Many table rows -tableRows :: GenParser Char ParserState [[TableCell]] +tableRows :: Parsec [Char] ParserState [[TableCell]] tableRows = many1 tableRow -- | Table headers are made of cells separated by a tag "|_." -tableHeaders :: GenParser Char ParserState [TableCell] +tableHeaders :: Parsec [Char] ParserState [TableCell] tableHeaders = let separator = (try $ string "|_.") in try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline ) -- | A table with an optional header. Current implementation can -- handle tables with and without header, but will parse cells -- alignment attributes as content. -table :: GenParser Char ParserState Block +table :: Parsec [Char] ParserState Block table = try $ do headers <- option [] tableHeaders rows <- tableRows @@ -318,8 +318,8 @@ table = try $ do -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: String -- ^ block tag name - -> GenParser Char ParserState Block -- ^ implicit block - -> GenParser Char ParserState Block + -> Parsec [Char] ParserState Block -- ^ implicit block + -> Parsec [Char] ParserState Block maybeExplicitBlock name blk = try $ do optional $ try $ string name >> optional attributes >> char '.' >> ((try whitespace) <|> endline) @@ -333,15 +333,15 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: GenParser Char ParserState Inline +inline :: Parsec [Char] ParserState Inline inline = choice inlineParsers <?> "inline" -- | List of consecutive inlines before a newline -inlines :: GenParser Char ParserState [Inline] +inlines :: Parsec [Char] ParserState [Inline] inlines = manyTill inline newline -- | Inline parsers tried in order -inlineParsers :: [GenParser Char ParserState Inline] +inlineParsers :: [Parsec [Char] ParserState Inline] inlineParsers = [ autoLink , str , whitespace @@ -362,7 +362,7 @@ inlineParsers = [ autoLink ] -- | Inline markups -inlineMarkup :: GenParser Char ParserState Inline +inlineMarkup :: Parsec [Char] ParserState Inline inlineMarkup = choice [ simpleInline (string "??") (Cite []) , simpleInline (string "**") Strong , simpleInline (string "__") Emph @@ -375,29 +375,29 @@ inlineMarkup = choice [ simpleInline (string "??") (Cite []) ] -- | Trademark, registered, copyright -mark :: GenParser Char st Inline +mark :: Parsec [Char] st Inline mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: GenParser Char st Inline +reg :: Parsec [Char] st Inline reg = do oneOf "Rr" char ')' return $ Str "\174" -tm :: GenParser Char st Inline +tm :: Parsec [Char] st Inline tm = do oneOf "Tt" oneOf "Mm" char ')' return $ Str "\8482" -copy :: GenParser Char st Inline +copy :: Parsec [Char] st Inline copy = do oneOf "Cc" char ')' return $ Str "\169" -note :: GenParser Char ParserState Inline +note :: Parsec [Char] ParserState Inline note = try $ do ref <- (char '[' *> many1 digit <* char ']') notes <- stateNotes <$> getState @@ -421,7 +421,7 @@ wordBoundaries :: [Char] wordBoundaries = markupChars ++ stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: GenParser Char ParserState String +hyphenedWords :: Parsec [Char] ParserState String hyphenedWords = try $ do hd <- noneOf wordBoundaries tl <- many ( (noneOf wordBoundaries) <|> @@ -431,7 +431,7 @@ hyphenedWords = try $ do (\r -> concat [wd, "-", r]) <$> (char '-' *> hyphenedWords) -- | Any string -str :: GenParser Char ParserState Inline +str :: Parsec [Char] ParserState Inline str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediatly @@ -444,34 +444,34 @@ str = do return $ Str fullStr -- | Textile allows HTML span infos, we discard them -htmlSpan :: GenParser Char ParserState Inline +htmlSpan :: Parsec [Char] ParserState Inline htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') ) -- | Some number of space chars -whitespace :: GenParser Char ParserState Inline +whitespace :: Parsec [Char] ParserState Inline whitespace = many1 spaceChar >> return Space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: GenParser Char ParserState Inline +endline :: Parsec [Char] ParserState Inline endline = try $ do newline >> notFollowedBy blankline return LineBreak -rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline :: Parsec [Char] ParserState Inline rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: GenParser Char ParserState Inline +rawLaTeXInline' :: Parsec [Char] ParserState Inline rawLaTeXInline' = try $ do failIfStrict rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: GenParser Char ParserState Inline +link :: Parsec [Char] ParserState Inline link = linkB <|> linkNoB -linkNoB :: GenParser Char ParserState Inline +linkNoB :: Parsec [Char] ParserState Inline linkNoB = try $ do name <- surrounded (char '"') inline char ':' @@ -479,7 +479,7 @@ linkNoB = try $ do url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline))) return $ Link name (url, "") -linkB :: GenParser Char ParserState Inline +linkB :: Parsec [Char] ParserState Inline linkB = try $ do char '[' name <- surrounded (char '"') inline @@ -488,13 +488,13 @@ linkB = try $ do return $ Link name (url, "") -- | Detect plain links to http or email. -autoLink :: GenParser Char ParserState Inline +autoLink :: Parsec [Char] ParserState Inline autoLink = do (orig, src) <- (try uri <|> try emailAddress) return $ Link [Str orig] (src, "") -- | image embedding -image :: GenParser Char ParserState Inline +image :: Parsec [Char] ParserState Inline image = try $ do char '!' >> notFollowedBy space src <- manyTill anyChar (lookAhead $ oneOf "!(") @@ -502,49 +502,49 @@ image = try $ do char '!' return $ Image [Str alt] (src, alt) -escapedInline :: GenParser Char ParserState Inline +escapedInline :: Parsec [Char] ParserState Inline escapedInline = escapedEqs <|> escapedTag -escapedEqs :: GenParser Char ParserState Inline +escapedEqs :: Parsec [Char] ParserState Inline escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: GenParser Char ParserState Inline +escapedTag :: Parsec [Char] ParserState Inline escapedTag = Str <$> (try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: GenParser Char ParserState Inline +symbol :: Parsec [Char] ParserState Inline symbol = Str . singleton <$> oneOf wordBoundaries -- | Inline code -code :: GenParser Char ParserState Inline +code :: Parsec [Char] ParserState Inline code = code1 <|> code2 -code1 :: GenParser Char ParserState Inline +code1 :: Parsec [Char] ParserState Inline code1 = Code nullAttr <$> surrounded (char '@') anyChar -code2 :: GenParser Char ParserState Inline +code2 :: Parsec [Char] ParserState Inline code2 = do htmlTag (tagOpen (=="tt") null) Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: GenParser Char ParserState String +attributes :: Parsec [Char] ParserState String attributes = choice [ enclosed (char '(') (char ')') anyChar, enclosed (char '{') (char '}') anyChar, enclosed (char '[') (char ']') anyChar] -- | Parses material surrounded by a parser. -surrounded :: GenParser Char st t -- ^ surrounding parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] +surrounded :: Parsec [Char] st t -- ^ surrounding parser + -> Parsec [Char] st a -- ^ content parser (to be used repeatedly) + -> Parsec [Char] st [a] surrounded border = enclosed border (try border) -- | Inlines are most of the time of the same form -simpleInline :: GenParser Char ParserState t -- ^ surrounding parser +simpleInline :: Parsec [Char] ParserState t -- ^ surrounding parser -> ([Inline] -> Inline) -- ^ Inline constructor - -> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly) + -> Parsec [Char] ParserState Inline -- ^ content parser (to be used repeatedly) simpleInline border construct = surrounded border (inlineWithAttribute) >>= return . construct . normalizeSpaces where inlineWithAttribute = (try $ optional attributes) >> inline diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index dfdcd8e63..bd4cdcd86 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -68,8 +68,8 @@ module Text.Pandoc.Templates ( renderTemplate , TemplateTarget , getDefaultTemplate ) where -import Text.ParserCombinators.Parsec -import Control.Monad (liftM, when, forM) +import Text.Parsec +import Control.Monad (liftM, when, forM, mzero) import System.FilePath import Data.List (intercalate, intersperse) #if MIN_VERSION_blaze_html(0,5,0) @@ -98,7 +98,7 @@ getDefaultTemplate user writer = do data TemplateState = TemplateState Int [(String,String)] -adjustPosition :: String -> GenParser Char TemplateState String +adjustPosition :: String -> Parsec [Char] TemplateState String adjustPosition str = do let lastline = takeWhile (/= '\n') $ reverse str updateState $ \(TemplateState pos x) -> @@ -132,21 +132,21 @@ renderTemplate vals templ = reservedWords :: [String] reservedWords = ["else","endif","for","endfor","sep"] -parseTemplate :: GenParser Char TemplateState [String] +parseTemplate :: Parsec [Char] TemplateState [String] parseTemplate = many $ (plaintext <|> escapedDollar <|> conditional <|> for <|> variable) >>= adjustPosition -plaintext :: GenParser Char TemplateState String +plaintext :: Parsec [Char] TemplateState String plaintext = many1 $ noneOf "$" -escapedDollar :: GenParser Char TemplateState String +escapedDollar :: Parsec [Char] TemplateState String escapedDollar = try $ string "$$" >> return "$" -skipEndline :: GenParser Char st () +skipEndline :: Parsec [Char] st () skipEndline = try $ skipMany (oneOf " \t") >> newline >> return () -conditional :: GenParser Char TemplateState String +conditional :: Parsec [Char] TemplateState String conditional = try $ do TemplateState pos vars <- getState string "$if(" @@ -170,7 +170,7 @@ conditional = try $ do then ifContents else elseContents -for :: GenParser Char TemplateState String +for :: Parsec [Char] TemplateState String for = try $ do TemplateState pos vars <- getState string "$for(" @@ -193,16 +193,16 @@ for = try $ do setState $ TemplateState pos vars return $ concat $ intersperse sep contents -ident :: GenParser Char TemplateState String +ident :: Parsec [Char] TemplateState String ident = do first <- letter rest <- many (alphaNum <|> oneOf "_-") let id' = first : rest if id' `elem` reservedWords - then pzero + then mzero else return id' -variable :: GenParser Char TemplateState String +variable :: Parsec [Char] TemplateState String variable = try $ do char '$' id' <- ident diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 1913eb92b..9d66f76bb 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -41,7 +41,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Shared import Text.Pandoc.Parsing hiding (blankline) -import Text.ParserCombinators.Parsec ( runParser, GenParser ) +import Text.Parsec ( Parsec, runParser ) import Data.List ( isPrefixOf, intersperse, intercalate ) import Text.Pandoc.Pretty import Control.Monad.State @@ -93,7 +93,7 @@ escapeString = escapeStringUsing escs where escs = backslashEscapes "{" -- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char ParserState Char +olMarker :: Parsec [Char] ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 9cbcaeb47..1104c1f74 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -36,7 +36,7 @@ import Text.Pandoc.Generic import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Shared import Text.Pandoc.Parsing hiding (blankline) -import Text.ParserCombinators.Parsec ( runParser, GenParser ) +import Text.Parsec ( Parsec, runParser ) import Data.List ( group, isPrefixOf, find, intersperse, transpose ) import Text.Pandoc.Pretty import Control.Monad.State @@ -188,7 +188,7 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] <> "=\"" <> text v <> "\"") ks -- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char ParserState Char +olMarker :: Parsec [Char] ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && |