diff options
Diffstat (limited to 'src/Text/Pandoc/Parsing.hs')
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 483 |
1 files changed, 273 insertions, 210 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index cac2b71ca..50691f409 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Parsing Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -48,8 +49,6 @@ module Text.Pandoc.Parsing ( (>>~), withHorizDisplacement, withRaw, nullBlock, - failIfStrict, - failUnlessLHS, escaped, characterReference, updateLastStrPos, @@ -57,37 +56,127 @@ module Text.Pandoc.Parsing ( (>>~), orderedListMarker, charRef, tableWith, + widthsFromIndices, gridTableWith, readWith, testStringWith, + getOption, + guardEnabled, + guardDisabled, ParserState (..), defaultParserState, HeaderType (..), ParserContext (..), QuoteContext (..), NoteTable, + NoteTable', KeyTable, - Key, + Key (..), toKey, - fromKey, - lookupKeySrc, smartPunctuation, + withQuoteContext, + singleQuoteStart, + singleQuoteEnd, + doubleQuoteStart, + doubleQuoteEnd, + ellipses, + apostrophe, + dash, macro, - applyMacros' ) + applyMacros', + Parser, + F(..), + runF, + askF, + asksF, + -- * Re-exports from Text.Pandoc.Parsec + runParser, + parse, + anyToken, + getInput, + setInput, + unexpected, + char, + letter, + digit, + alphaNum, + skipMany, + skipMany1, + spaces, + space, + anyChar, + satisfy, + newline, + string, + count, + eof, + noneOf, + oneOf, + lookAhead, + notFollowedBy, + many, + many1, + manyTill, + (<|>), + (<?>), + choice, + try, + sepBy, + sepBy1, + sepEndBy, + sepEndBy1, + endBy, + endBy1, + option, + optional, + optionMaybe, + getState, + setState, + updateState, + getPosition, + setPosition, + sourceColumn, + sourceLine, + newPos, + token + ) where import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Options +import Text.Pandoc.Builder (Blocks) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) -import Text.ParserCombinators.Parsec +import Text.Parsec +import Text.Parsec.Pos (newPos) import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) -import Control.Monad ( join, liftM, guard ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions) import Text.HTML.TagSoup.Entity ( lookupEntity ) +import Data.Default +import qualified Data.Set as Set +import Control.Monad.Reader +import Data.Monoid + +type Parser t s = Parsec t s + +newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Functor) + +runF :: F a -> ParserState -> a +runF = runReader . unF + +askF :: F ParserState +askF = F ask + +asksF :: (ParserState -> a) -> F a +asksF f = F $ asks f + +instance Monoid a => Monoid (F a) where + mempty = return mempty + mappend = liftM2 mappend + mconcat = liftM mconcat . sequence -- | Like >>, but returns the operation on the left. -- (Suggested by Tillmann Rendel on Haskell-cafe list.) @@ -95,62 +184,69 @@ import Text.HTML.TagSoup.Entity ( lookupEntity ) a >>~ b = a >>= \x -> b >> return x -- | Parse any line of text -anyLine :: GenParser Char st [Char] +anyLine :: Parser [Char] st [Char] anyLine = manyTill anyChar newline -- | Like @manyTill@, but reads at least one item. -many1Till :: GenParser tok st a - -> GenParser tok st end - -> GenParser tok st [a] +many1Till :: Parser [tok] st a + -> Parser [tok] st end + -> Parser [tok] st [a] many1Till p end = do first <- p rest <- manyTill p end return (first:rest) --- | A more general form of @notFollowedBy@. This one allows any +-- | A more general form of @notFollowedBy@. This one allows any -- type of parser to be specified, and succeeds only if that parser fails. -- It does not consume any input. -notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () +notFollowedBy' :: Show b => Parser [a] st b -> Parser [a] st () notFollowedBy' p = try $ join $ do a <- try p return (unexpected (show a)) <|> return (return ()) -- (This version due to Andrew Pimlott on the Haskell mailing list.) --- | Parses one of a list of strings (tried in order). -oneOfStrings :: [String] -> GenParser Char st String -oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings +-- | Parses one of a list of strings (tried in order). +oneOfStrings :: [String] -> Parser [Char] st String +oneOfStrings [] = fail "no strings" +oneOfStrings strs = do + c <- anyChar + let strs' = [xs | (x:xs) <- strs, x == c] + case strs' of + [] -> fail "not found" + z | "" `elem` z -> return [c] + | otherwise -> (c:) `fmap` oneOfStrings strs' -- | Parses a space or tab. -spaceChar :: CharParser st Char +spaceChar :: Parser [Char] st Char spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. -nonspaceChar :: CharParser st Char +nonspaceChar :: Parser [Char] st Char nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r' -- | Skips zero or more spaces or tabs. -skipSpaces :: GenParser Char st () +skipSpaces :: Parser [Char] st () skipSpaces = skipMany spaceChar -- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: GenParser Char st Char +blankline :: Parser [Char] st Char blankline = try $ skipSpaces >> newline -- | Parses one or more blank lines and returns a string of newlines. -blanklines :: GenParser Char st [Char] +blanklines :: Parser [Char] st [Char] blanklines = many1 blankline -- | Parses material enclosed between start and end parsers. -enclosed :: GenParser Char st t -- ^ start parser - -> GenParser Char st end -- ^ end parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] -enclosed start end parser = try $ +enclosed :: Parser [Char] st t -- ^ start parser + -> Parser [Char] st end -- ^ end parser + -> Parser [Char] st a -- ^ content parser (to be used repeatedly) + -> Parser [Char] st [a] +enclosed start end parser = try $ start >> notFollowedBy space >> many1Till parser end -- | Parse string, case insensitive. -stringAnyCase :: [Char] -> CharParser st String +stringAnyCase :: [Char] -> Parser [Char] st String stringAnyCase [] = string "" stringAnyCase (x:xs) = do firstChar <- char (toUpper x) <|> char (toLower x) @@ -158,7 +254,7 @@ stringAnyCase (x:xs) = do return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a +parseFromString :: Parser [tok] st a -> [tok] -> Parser [tok] st a parseFromString parser str = do oldPos <- getPosition oldInput <- getInput @@ -169,8 +265,8 @@ parseFromString parser str = do return result -- | Parse raw line block up to and including blank lines. -lineClump :: GenParser Char st String -lineClump = blanklines +lineClump :: Parser [Char] st String +lineClump = blanklines <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) -- | Parse a string of characters between an open character @@ -178,8 +274,8 @@ lineClump = blanklines -- pairs of open and close, which must be different. For example, -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" -- and return "hello (there)". -charsInBalanced :: Char -> Char -> GenParser Char st Char - -> GenParser Char st String +charsInBalanced :: Char -> Char -> Parser [Char] st Char + -> Parser [Char] st String charsInBalanced open close parser = try $ do char open let isDelim c = c == open || c == close @@ -204,13 +300,13 @@ uppercaseRomanDigits = map toUpper lowercaseRomanDigits -- | Parses a roman numeral (uppercase or lowercase), returns number. romanNumeral :: Bool -- ^ Uppercase if true - -> GenParser Char st Int + -> Parser [Char] st Int romanNumeral upperCase = do - let romanDigits = if upperCase - then uppercaseRomanDigits + let romanDigits = if upperCase + then uppercaseRomanDigits else lowercaseRomanDigits lookAhead $ oneOf romanDigits - let [one, five, ten, fifty, hundred, fivehundred, thousand] = + let [one, five, ten, fifty, hundred, fivehundred, thousand] = map char romanDigits thousands <- many thousand >>= (return . (1000 *) . length) ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 @@ -234,14 +330,14 @@ romanNumeral upperCase = do -- Parsers for email addresses and URIs -emailChar :: GenParser Char st Char +emailChar :: Parser [Char] st Char emailChar = alphaNum <|> satisfy (\c -> c == '-' || c == '+' || c == '_' || c == '.') -domainChar :: GenParser Char st Char +domainChar :: Parser [Char] st Char domainChar = alphaNum <|> char '-' -domain :: GenParser Char st [Char] +domain :: Parser [Char] st [Char] domain = do first <- many1 domainChar dom <- many1 $ try (char '.' >> many1 domainChar ) @@ -249,7 +345,7 @@ domain = do -- | Parses an email address; returns original and corresponding -- escaped mailto: URI. -emailAddress :: GenParser Char st (String, String) +emailAddress :: Parser [Char] st (String, String) emailAddress = try $ do firstLetter <- alphaNum restAddr <- many emailChar @@ -260,7 +356,7 @@ emailAddress = try $ do return (full, escapeURI $ "mailto:" ++ full) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: GenParser Char st (String, String) +uri :: Parser [Char] st (String, String) uri = try $ do let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", "news:", "telnet:" ] @@ -294,8 +390,8 @@ uri = try $ do -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement -- (source row) is ignored. -withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply - -> GenParser Char st (a, Int) -- ^ (result, displacement) +withHorizDisplacement :: Parser [Char] st a -- ^ Parser to apply + -> Parser [Char] st (a, Int) -- ^ (result, displacement) withHorizDisplacement parser = do pos1 <- getPosition result <- parser @@ -304,7 +400,7 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. -withRaw :: GenParser Char st a -> GenParser Char st (a, [Char]) +withRaw :: Parser [Char] st a -> Parser [Char] st (a, [Char]) withRaw parser = do pos1 <- getPosition inp <- getInput @@ -321,26 +417,16 @@ withRaw parser = do -- | Parses a character and returns 'Null' (so that the parser can move on -- if it gets stuck). -nullBlock :: GenParser Char st Block +nullBlock :: Parser [Char] st Block nullBlock = anyChar >> return Null --- | Fail if reader is in strict markdown syntax mode. -failIfStrict :: GenParser a ParserState () -failIfStrict = do - state <- getState - if stateStrict state then fail "strict mode" else return () - --- | Fail unless we're in literate haskell mode. -failUnlessLHS :: GenParser tok ParserState () -failUnlessLHS = getState >>= guard . stateLiterateHaskell - -- | Parses backslash, then applies character parser. -escaped :: GenParser Char st Char -- ^ Parser for character to escape - -> GenParser Char st Char +escaped :: Parser [Char] st Char -- ^ Parser for character to escape + -> Parser [Char] st Char escaped parser = try $ char '\\' >> parser -- | Parse character entity. -characterReference :: GenParser Char st Char +characterReference :: Parser [Char] st Char characterReference = try $ do char '&' ent <- many1Till nonspaceChar (char ';') @@ -349,19 +435,19 @@ characterReference = try $ do Nothing -> fail "entity not found" -- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: GenParser Char st (ListNumberStyle, Int) +upperRoman :: Parser [Char] st (ListNumberStyle, Int) upperRoman = do num <- romanNumeral True return (UpperRoman, num) -- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: GenParser Char st (ListNumberStyle, Int) +lowerRoman :: Parser [Char] st (ListNumberStyle, Int) lowerRoman = do num <- romanNumeral False return (LowerRoman, num) -- | Parses a decimal numeral and returns (Decimal, number). -decimal :: GenParser Char st (ListNumberStyle, Int) +decimal :: Parser [Char] st (ListNumberStyle, Int) decimal = do num <- many1 digit return (Decimal, read num) @@ -370,7 +456,7 @@ decimal = do -- returns (DefaultStyle, [next example number]). The next -- example number is incremented in parser state, and the label -- (if present) is added to the label table. -exampleNum :: GenParser Char ParserState (ListNumberStyle, Int) +exampleNum :: Parser [Char] ParserState (ListNumberStyle, Int) exampleNum = do char '@' lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) @@ -384,38 +470,38 @@ exampleNum = do return (Example, num) -- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: GenParser Char st (ListNumberStyle, Int) +defaultNum :: Parser [Char] st (ListNumberStyle, Int) defaultNum = do char '#' return (DefaultStyle, 1) -- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: GenParser Char st (ListNumberStyle, Int) +lowerAlpha :: Parser [Char] st (ListNumberStyle, Int) lowerAlpha = do ch <- oneOf ['a'..'z'] return (LowerAlpha, ord ch - ord 'a' + 1) -- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: GenParser Char st (ListNumberStyle, Int) +upperAlpha :: Parser [Char] st (ListNumberStyle, Int) upperAlpha = do ch <- oneOf ['A'..'Z'] return (UpperAlpha, ord ch - ord 'A' + 1) -- | Parses a roman numeral i or I -romanOne :: GenParser Char st (ListNumberStyle, Int) +romanOne :: Parser [Char] st (ListNumberStyle, Int) romanOne = (char 'i' >> return (LowerRoman, 1)) <|> (char 'I' >> return (UpperRoman, 1)) -- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: GenParser Char ParserState ListAttributes -anyOrderedListMarker = choice $ +anyOrderedListMarker :: Parser [Char] ParserState ListAttributes +anyOrderedListMarker = choice $ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], numParser <- [decimal, exampleNum, defaultNum, romanOne, lowerAlpha, lowerRoman, upperAlpha, upperRoman]] -- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inPeriod :: Parser [Char] st (ListNumberStyle, Int) + -> Parser [Char] st ListAttributes inPeriod num = try $ do (style, start) <- num char '.' @@ -423,18 +509,18 @@ inPeriod num = try $ do then DefaultDelim else Period return (start, style, delim) - + -- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inOneParen :: Parser [Char] st (ListNumberStyle, Int) + -> Parser [Char] st ListAttributes inOneParen num = try $ do (style, start) <- num char ')' return (start, style, OneParen) -- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inTwoParens :: Parser [Char] st (ListNumberStyle, Int) + -> Parser [Char] st ListAttributes inTwoParens num = try $ do char '(' (style, start) <- num @@ -443,9 +529,9 @@ inTwoParens num = try $ do -- | Parses an ordered list marker with a given style and delimiter, -- returns number. -orderedListMarker :: ListNumberStyle - -> ListNumberDelim - -> GenParser Char ParserState Int +orderedListMarker :: ListNumberStyle + -> ListNumberDelim + -> Parser [Char] ParserState Int orderedListMarker style delim = do let num = defaultNum <|> -- # can continue any kind of list case style of @@ -465,38 +551,34 @@ orderedListMarker style delim = do return start -- | Parses a character reference and returns a Str element. -charRef :: GenParser Char st Inline +charRef :: Parser [Char] st Inline charRef = do c <- characterReference return $ Str [c] -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Block]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState sep - -> GenParser Char ParserState end - -> GenParser Char ParserState [Inline] - -> GenParser Char ParserState Block -tableWith headerParser rowParser lineParser footerParser captionParser = try $ do - caption' <- option [] captionParser +tableWith :: Parser [Char] ParserState ([[Block]], [Alignment], [Int]) + -> ([Int] -> Parser [Char] ParserState [[Block]]) + -> Parser [Char] ParserState sep + -> Parser [Char] ParserState end + -> Parser [Char] ParserState Block +tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy` lineParser + lines' <- rowParser indices `sepEndBy1` lineParser footerParser - caption <- if null caption' - then option [] captionParser - else return caption' - state <- getState - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table caption aligns widths heads lines' + numColumns <- getOption readerColumns + let widths = if (indices == []) + then replicate (length aligns) 0.0 + else widthsFromIndices numColumns indices + return $ Table [] aligns widths heads lines' -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal -> [Int] -- Indices -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns' indices = +widthsFromIndices _ [] = [] +widthsFromIndices numColumns' indices = let numColumns = max numColumns' (if null indices then 0 else last indices) lengths' = zipWith (-) indices (0:indices) lengths = reverse $ @@ -516,28 +598,30 @@ widthsFromIndices numColumns' indices = fracs = map (\l -> (fromIntegral l) / quotient) lengths in tail fracs +--- + -- Parse a grid table: starts with row of '-' on top, then header -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTableWith :: GenParser Char ParserState Block -- ^ Block parser - -> GenParser Char ParserState [Inline] -- ^ Caption parser +gridTableWith :: Parser [Char] ParserState [Block] -- ^ Block list parser -> Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTableWith block tableCaption headless = - tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption + -> Parser [Char] ParserState Block +gridTableWith blocks headless = + tableWith (gridTableHeader headless blocks) (gridTableRow blocks) + (gridTableSep '-') gridTableFooter gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ removeTrailingSpace line -gridPart :: Char -> GenParser Char st (Int, Int) +gridPart :: Char -> Parser [Char] st (Int, Int) gridPart ch = do dashes <- many1 (char ch) char '+' return (length dashes, length dashes + 1) -gridDashedLines :: Char -> GenParser Char st [(Int,Int)] +gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline removeFinalBar :: String -> String @@ -545,18 +629,18 @@ removeFinalBar = reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse -- | Separator between rows of grid table. -gridTableSep :: Char -> GenParser Char ParserState Char +gridTableSep :: Char -> Parser [Char] ParserState Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. gridTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) -gridTableHeader headless block = try $ do + -> Parser [Char] ParserState [Block] + -> Parser [Char] ParserState ([[Block]], [Alignment], [Int]) +gridTableHeader headless blocks = try $ do optional blanklines dashes <- gridDashedLines '-' rawContent <- if headless - then return $ repeat "" + then return $ repeat "" else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) @@ -571,25 +655,25 @@ gridTableHeader headless block = try $ do then replicate (length dashes) "" else map (intercalate " ") $ transpose $ map (gridTableSplitLine indices) rawContent - heads <- mapM (parseFromString $ many block) $ + heads <- mapM (parseFromString blocks) $ map removeLeadingTrailingSpace rawHeads return (heads, aligns, indices) -gridTableRawLine :: [Int] -> GenParser Char ParserState [String] +gridTableRawLine :: [Int] -> Parser [Char] ParserState [String] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline return (gridTableSplitLine indices line) -- | Parse row of grid table. -gridTableRow :: GenParser Char ParserState Block +gridTableRow :: Parser [Char] ParserState [Block] -> [Int] - -> GenParser Char ParserState [[Block]] -gridTableRow block indices = do + -> Parser [Char] ParserState [[Block]] +gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - mapM (liftM compactifyCell . parseFromString (many block)) cols + mapM (liftM compactifyCell . parseFromString blocks) cols removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = @@ -603,23 +687,23 @@ compactifyCell :: [Block] -> [Block] compactifyCell bs = head $ compactify [bs] -- | Parse footer for a grid table. -gridTableFooter :: GenParser Char ParserState [Char] +gridTableFooter :: Parser [Char] ParserState [Char] gridTableFooter = blanklines --- -- | Parse a string with a given parser and state. -readWith :: GenParser t ParserState a -- ^ parser +readWith :: Parser [t] ParserState a -- ^ parser -> ParserState -- ^ initial state -> [t] -- ^ input -> a -readWith parser state input = +readWith parser state input = case runParser parser state "source" input of Left err' -> error $ "\nError:\n" ++ show err' Right result -> result -- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => GenParser Char ParserState a +testStringWith :: (Show a) => Parser [Char] ParserState a -> String -> IO () testStringWith parser str = UTF8.putStrLn $ show $ @@ -627,72 +711,67 @@ testStringWith parser str = UTF8.putStrLn $ show $ -- | Parsing options. data ParserState = ParserState - { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? + { stateOptions :: ReaderOptions, -- ^ User options stateParserContext :: ParserContext, -- ^ Inside list? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateAllowLinks :: Bool, -- ^ Allow parsing of links stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed - stateKeys :: KeyTable, -- ^ List of reference keys - stateCitations :: [String], -- ^ List of available citations - stateNotes :: NoteTable, -- ^ List of notes - stateTabStop :: Int, -- ^ Tab stop - stateStandalone :: Bool, -- ^ Parse bibliographic info? + stateKeys :: KeyTable, -- ^ List of reference keys (with fallbacks) + stateNotes :: NoteTable, -- ^ List of notes (raw bodies) + stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) stateTitle :: [Inline], -- ^ Title of document stateAuthors :: [[Inline]], -- ^ Authors of document stateDate :: [Inline], -- ^ Date of document - stateStrict :: Bool, -- ^ Use strict markdown syntax? - stateSmart :: Bool, -- ^ Use smart typography? - stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior - -- in parsing dashes; -- is em-dash; - -- before numeral is en-dash - stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell - stateColumns :: Int, -- ^ Number of columns in terminal stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used - stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks stateNextExample :: Int, -- ^ Number of next example - stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers + stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers stateHasChapters :: Bool, -- ^ True if \chapter encountered - stateApplyMacros :: Bool, -- ^ Apply LaTeX macros? stateMacros :: [Macro], -- ^ List of macros defined so far stateRstDefaultRole :: String -- ^ Current rST default interpreted text role } - deriving Show + +instance Default ParserState where + def = defaultParserState defaultParserState :: ParserState -defaultParserState = - ParserState { stateParseRaw = False, +defaultParserState = + ParserState { stateOptions = def, stateParserContext = NullState, stateQuoteContext = NoQuote, + stateAllowLinks = True, stateMaxNestingLevel = 6, stateLastStrPos = Nothing, stateKeys = M.empty, - stateCitations = [], stateNotes = [], - stateTabStop = 4, - stateStandalone = False, + stateNotes' = [], stateTitle = [], stateAuthors = [], stateDate = [], - stateStrict = False, - stateSmart = False, - stateOldDashes = False, - stateLiterateHaskell = False, - stateColumns = 80, stateHeaderTable = [], - stateIndentedCodeClasses = [], stateNextExample = 1, stateExamples = M.empty, stateHasChapters = False, - stateApplyMacros = True, stateMacros = [], stateRstDefaultRole = "title-reference"} -data HeaderType +getOption :: (ReaderOptions -> a) -> Parser s ParserState a +getOption f = (f . stateOptions) `fmap` getState + +-- | Succeed only if the extension is enabled. +guardEnabled :: Extension -> Parser s ParserState () +guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext + +-- | Succeed only if the extension is disabled. +guardDisabled :: Extension -> Parser s ParserState () +guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext + +data HeaderType = SingleHeader Char -- ^ Single line of characters underneath | DoubleHeader Char -- ^ Lines of characters above and below deriving (Eq, Show) -data ParserContext +data ParserContext = ListItemState -- ^ Used when running parser on list item contents | NullState -- ^ Default state deriving (Eq, Show) @@ -705,51 +784,35 @@ data QuoteContext type NoteTable = [(String, String)] -newtype Key = Key [Inline] deriving (Show, Read, Eq, Ord) +type NoteTable' = [(String, F Blocks)] -- used in markdown reader -toKey :: [Inline] -> Key -toKey = Key . bottomUp lowercase - where lowercase :: Inline -> Inline - lowercase (Str xs) = Str (map toLower xs) - lowercase (Math t xs) = Math t (map toLower xs) - lowercase (Code attr xs) = Code attr (map toLower xs) - lowercase (RawInline f xs) = RawInline f (map toLower xs) - lowercase LineBreak = Space - lowercase x = x +newtype Key = Key String deriving (Show, Read, Eq, Ord) -fromKey :: Key -> [Inline] -fromKey (Key xs) = xs +toKey :: String -> Key +toKey = Key . map toLower . unwords . words type KeyTable = M.Map Key Target --- | Look up key in key table and return target object. -lookupKeySrc :: KeyTable -- ^ Key table - -> Key -- ^ Key - -> Maybe Target -lookupKeySrc table key = case M.lookup key table of - Nothing -> Nothing - Just src -> Just src - -- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: GenParser tok ParserState () -failUnlessSmart = getState >>= guard . stateSmart +failUnlessSmart :: Parser [tok] ParserState () +failUnlessSmart = getOption readerSmart >>= guard -smartPunctuation :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +smartPunctuation :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline smartPunctuation inlineParser = do failUnlessSmart choice [ quoted inlineParser, apostrophe, dash, ellipses ] -apostrophe :: GenParser Char ParserState Inline +apostrophe :: Parser [Char] ParserState Inline apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019") -quoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +quoted :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser withQuoteContext :: QuoteContext - -> (GenParser Char ParserState Inline) - -> GenParser Char ParserState Inline + -> Parser [Char] ParserState a + -> Parser [Char] ParserState a withQuoteContext context parser = do oldState <- getState let oldQuoteContext = stateQuoteContext oldState @@ -759,39 +822,39 @@ withQuoteContext context parser = do setState newState { stateQuoteContext = oldQuoteContext } return result -singleQuoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +singleQuoted :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline singleQuoted inlineParser = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= return . Quoted SingleQuote . normalizeSpaces -doubleQuoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +doubleQuoted :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline doubleQuoted inlineParser = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ do contents <- manyTill inlineParser doubleQuoteEnd return . Quoted DoubleQuote . normalizeSpaces $ contents -failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () +failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState () failIfInQuoteContext context = do st <- getState if stateQuoteContext st == context then fail "already inside quotes" else return () -charOrRef :: [Char] -> GenParser Char st Char +charOrRef :: [Char] -> Parser [Char] st Char charOrRef cs = oneOf cs <|> try (do c <- characterReference guard (c `elem` cs) return c) -updateLastStrPos :: GenParser Char ParserState () -updateLastStrPos = getPosition >>= \p -> +updateLastStrPos :: Parser [Char] ParserState () +updateLastStrPos = getPosition >>= \p -> updateState $ \s -> s{ stateLastStrPos = Just p } -singleQuoteStart :: GenParser Char ParserState () +singleQuoteStart :: Parser [Char] ParserState () singleQuoteStart = do failIfInQuoteContext InSingleQuote pos <- getPosition @@ -802,61 +865,61 @@ singleQuoteStart = do notFollowedBy (oneOf ")!],;:-? \t\n") notFollowedBy (char '.') <|> lookAhead (string "..." >> return ()) notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) + satisfy (not . isAlphaNum))) -- possess/contraction return () -singleQuoteEnd :: GenParser Char st () +singleQuoteEnd :: Parser [Char] st () singleQuoteEnd = try $ do charOrRef "'\8217\146" notFollowedBy alphaNum -doubleQuoteStart :: GenParser Char ParserState () +doubleQuoteStart :: Parser [Char] ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) -doubleQuoteEnd :: GenParser Char st () +doubleQuoteEnd :: Parser [Char] st () doubleQuoteEnd = do charOrRef "\"\8221\148" return () -ellipses :: GenParser Char st Inline +ellipses :: Parser [Char] st Inline ellipses = do try (charOrRef "\8230\133") <|> try (string "..." >> return '…') return (Str "\8230") -dash :: GenParser Char ParserState Inline +dash :: Parser [Char] ParserState Inline dash = do - oldDashes <- stateOldDashes `fmap` getState + oldDashes <- getOption readerOldDashes if oldDashes then emDashOld <|> enDashOld else Str `fmap` (hyphenDash <|> emDash <|> enDash) -- Two hyphens = en-dash, three = em-dash -hyphenDash :: GenParser Char st String +hyphenDash :: Parser [Char] st String hyphenDash = do try $ string "--" option "\8211" (char '-' >> return "\8212") -emDash :: GenParser Char st String +emDash :: Parser [Char] st String emDash = do try (charOrRef "\8212\151") return "\8212" -enDash :: GenParser Char st String +enDash :: Parser [Char] st String enDash = do try (charOrRef "\8212\151") return "\8211" -enDashOld :: GenParser Char st Inline +enDashOld :: Parser [Char] st Inline enDashOld = do try (charOrRef "\8211\150") <|> try (char '-' >> lookAhead (satisfy isDigit) >> return '–') return (Str "\8211") -emDashOld :: GenParser Char st Inline +emDashOld :: Parser [Char] st Inline emDashOld = do try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-') return (Str "\8212") @@ -866,24 +929,24 @@ emDashOld = do -- -- | Parse a \newcommand or \renewcommand macro definition. -macro :: GenParser Char ParserState Block +macro :: Parser [Char] ParserState Block macro = do - apply <- stateApplyMacros `fmap` getState + apply <- getOption readerApplyMacros inp <- getInput case parseMacroDefinitions inp of - ([], _) -> pzero - (ms, rest) -> do def <- count (length inp - length rest) anyChar + ([], _) -> mzero + (ms, rest) -> do def' <- count (length inp - length rest) anyChar if apply then do updateState $ \st -> st { stateMacros = ms ++ stateMacros st } return Null - else return $ RawBlock "latex" def + else return $ RawBlock "latex" def' -- | Apply current macros to string. -applyMacros' :: String -> GenParser Char ParserState String +applyMacros' :: String -> Parser [Char] ParserState String applyMacros' target = do - apply <- liftM stateApplyMacros getState + apply <- getOption readerApplyMacros if apply then do macros <- liftM stateMacros getState return $ applyMacros macros target |