aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Parsing.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Parsing.hs')
-rw-r--r--src/Text/Pandoc/Parsing.hs483
1 files changed, 273 insertions, 210 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index cac2b71ca..50691f409 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Parsing
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -48,8 +49,6 @@ module Text.Pandoc.Parsing ( (>>~),
withHorizDisplacement,
withRaw,
nullBlock,
- failIfStrict,
- failUnlessLHS,
escaped,
characterReference,
updateLastStrPos,
@@ -57,37 +56,127 @@ module Text.Pandoc.Parsing ( (>>~),
orderedListMarker,
charRef,
tableWith,
+ widthsFromIndices,
gridTableWith,
readWith,
testStringWith,
+ getOption,
+ guardEnabled,
+ guardDisabled,
ParserState (..),
defaultParserState,
HeaderType (..),
ParserContext (..),
QuoteContext (..),
NoteTable,
+ NoteTable',
KeyTable,
- Key,
+ Key (..),
toKey,
- fromKey,
- lookupKeySrc,
smartPunctuation,
+ withQuoteContext,
+ singleQuoteStart,
+ singleQuoteEnd,
+ doubleQuoteStart,
+ doubleQuoteEnd,
+ ellipses,
+ apostrophe,
+ dash,
macro,
- applyMacros' )
+ applyMacros',
+ Parser,
+ F(..),
+ runF,
+ askF,
+ asksF,
+ -- * Re-exports from Text.Pandoc.Parsec
+ runParser,
+ parse,
+ anyToken,
+ getInput,
+ setInput,
+ unexpected,
+ char,
+ letter,
+ digit,
+ alphaNum,
+ skipMany,
+ skipMany1,
+ spaces,
+ space,
+ anyChar,
+ satisfy,
+ newline,
+ string,
+ count,
+ eof,
+ noneOf,
+ oneOf,
+ lookAhead,
+ notFollowedBy,
+ many,
+ many1,
+ manyTill,
+ (<|>),
+ (<?>),
+ choice,
+ try,
+ sepBy,
+ sepBy1,
+ sepEndBy,
+ sepEndBy1,
+ endBy,
+ endBy1,
+ option,
+ optional,
+ optionMaybe,
+ getState,
+ setState,
+ updateState,
+ getPosition,
+ setPosition,
+ sourceColumn,
+ sourceLine,
+ newPos,
+ token
+ )
where
import Text.Pandoc.Definition
-import Text.Pandoc.Generic
+import Text.Pandoc.Options
+import Text.Pandoc.Builder (Blocks)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
-import Text.ParserCombinators.Parsec
+import Text.Parsec
+import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
import Data.List ( intercalate, transpose )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
-import Control.Monad ( join, liftM, guard )
import Text.Pandoc.Shared
import qualified Data.Map as M
import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
import Text.HTML.TagSoup.Entity ( lookupEntity )
+import Data.Default
+import qualified Data.Set as Set
+import Control.Monad.Reader
+import Data.Monoid
+
+type Parser t s = Parsec t s
+
+newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Functor)
+
+runF :: F a -> ParserState -> a
+runF = runReader . unF
+
+askF :: F ParserState
+askF = F ask
+
+asksF :: (ParserState -> a) -> F a
+asksF f = F $ asks f
+
+instance Monoid a => Monoid (F a) where
+ mempty = return mempty
+ mappend = liftM2 mappend
+ mconcat = liftM mconcat . sequence
-- | Like >>, but returns the operation on the left.
-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
@@ -95,62 +184,69 @@ import Text.HTML.TagSoup.Entity ( lookupEntity )
a >>~ b = a >>= \x -> b >> return x
-- | Parse any line of text
-anyLine :: GenParser Char st [Char]
+anyLine :: Parser [Char] st [Char]
anyLine = manyTill anyChar newline
-- | Like @manyTill@, but reads at least one item.
-many1Till :: GenParser tok st a
- -> GenParser tok st end
- -> GenParser tok st [a]
+many1Till :: Parser [tok] st a
+ -> Parser [tok] st end
+ -> Parser [tok] st [a]
many1Till p end = do
first <- p
rest <- manyTill p end
return (first:rest)
--- | A more general form of @notFollowedBy@. This one allows any
+-- | A more general form of @notFollowedBy@. This one allows any
-- type of parser to be specified, and succeeds only if that parser fails.
-- It does not consume any input.
-notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
+notFollowedBy' :: Show b => Parser [a] st b -> Parser [a] st ()
notFollowedBy' p = try $ join $ do a <- try p
return (unexpected (show a))
<|>
return (return ())
-- (This version due to Andrew Pimlott on the Haskell mailing list.)
--- | Parses one of a list of strings (tried in order).
-oneOfStrings :: [String] -> GenParser Char st String
-oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
+-- | Parses one of a list of strings (tried in order).
+oneOfStrings :: [String] -> Parser [Char] st String
+oneOfStrings [] = fail "no strings"
+oneOfStrings strs = do
+ c <- anyChar
+ let strs' = [xs | (x:xs) <- strs, x == c]
+ case strs' of
+ [] -> fail "not found"
+ z | "" `elem` z -> return [c]
+ | otherwise -> (c:) `fmap` oneOfStrings strs'
-- | Parses a space or tab.
-spaceChar :: CharParser st Char
+spaceChar :: Parser [Char] st Char
spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
-- | Parses a nonspace, nonnewline character.
-nonspaceChar :: CharParser st Char
+nonspaceChar :: Parser [Char] st Char
nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r'
-- | Skips zero or more spaces or tabs.
-skipSpaces :: GenParser Char st ()
+skipSpaces :: Parser [Char] st ()
skipSpaces = skipMany spaceChar
-- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: GenParser Char st Char
+blankline :: Parser [Char] st Char
blankline = try $ skipSpaces >> newline
-- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: GenParser Char st [Char]
+blanklines :: Parser [Char] st [Char]
blanklines = many1 blankline
-- | Parses material enclosed between start and end parsers.
-enclosed :: GenParser Char st t -- ^ start parser
- -> GenParser Char st end -- ^ end parser
- -> GenParser Char st a -- ^ content parser (to be used repeatedly)
- -> GenParser Char st [a]
-enclosed start end parser = try $
+enclosed :: Parser [Char] st t -- ^ start parser
+ -> Parser [Char] st end -- ^ end parser
+ -> Parser [Char] st a -- ^ content parser (to be used repeatedly)
+ -> Parser [Char] st [a]
+enclosed start end parser = try $
start >> notFollowedBy space >> many1Till parser end
-- | Parse string, case insensitive.
-stringAnyCase :: [Char] -> CharParser st String
+stringAnyCase :: [Char] -> Parser [Char] st String
stringAnyCase [] = string ""
stringAnyCase (x:xs) = do
firstChar <- char (toUpper x) <|> char (toLower x)
@@ -158,7 +254,7 @@ stringAnyCase (x:xs) = do
return (firstChar:rest)
-- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromString :: Parser [tok] st a -> [tok] -> Parser [tok] st a
parseFromString parser str = do
oldPos <- getPosition
oldInput <- getInput
@@ -169,8 +265,8 @@ parseFromString parser str = do
return result
-- | Parse raw line block up to and including blank lines.
-lineClump :: GenParser Char st String
-lineClump = blanklines
+lineClump :: Parser [Char] st String
+lineClump = blanklines
<|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
-- | Parse a string of characters between an open character
@@ -178,8 +274,8 @@ lineClump = blanklines
-- pairs of open and close, which must be different. For example,
-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
-- and return "hello (there)".
-charsInBalanced :: Char -> Char -> GenParser Char st Char
- -> GenParser Char st String
+charsInBalanced :: Char -> Char -> Parser [Char] st Char
+ -> Parser [Char] st String
charsInBalanced open close parser = try $ do
char open
let isDelim c = c == open || c == close
@@ -204,13 +300,13 @@ uppercaseRomanDigits = map toUpper lowercaseRomanDigits
-- | Parses a roman numeral (uppercase or lowercase), returns number.
romanNumeral :: Bool -- ^ Uppercase if true
- -> GenParser Char st Int
+ -> Parser [Char] st Int
romanNumeral upperCase = do
- let romanDigits = if upperCase
- then uppercaseRomanDigits
+ let romanDigits = if upperCase
+ then uppercaseRomanDigits
else lowercaseRomanDigits
lookAhead $ oneOf romanDigits
- let [one, five, ten, fifty, hundred, fivehundred, thousand] =
+ let [one, five, ten, fifty, hundred, fivehundred, thousand] =
map char romanDigits
thousands <- many thousand >>= (return . (1000 *) . length)
ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
@@ -234,14 +330,14 @@ romanNumeral upperCase = do
-- Parsers for email addresses and URIs
-emailChar :: GenParser Char st Char
+emailChar :: Parser [Char] st Char
emailChar = alphaNum <|>
satisfy (\c -> c == '-' || c == '+' || c == '_' || c == '.')
-domainChar :: GenParser Char st Char
+domainChar :: Parser [Char] st Char
domainChar = alphaNum <|> char '-'
-domain :: GenParser Char st [Char]
+domain :: Parser [Char] st [Char]
domain = do
first <- many1 domainChar
dom <- many1 $ try (char '.' >> many1 domainChar )
@@ -249,7 +345,7 @@ domain = do
-- | Parses an email address; returns original and corresponding
-- escaped mailto: URI.
-emailAddress :: GenParser Char st (String, String)
+emailAddress :: Parser [Char] st (String, String)
emailAddress = try $ do
firstLetter <- alphaNum
restAddr <- many emailChar
@@ -260,7 +356,7 @@ emailAddress = try $ do
return (full, escapeURI $ "mailto:" ++ full)
-- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: GenParser Char st (String, String)
+uri :: Parser [Char] st (String, String)
uri = try $ do
let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:",
"news:", "telnet:" ]
@@ -294,8 +390,8 @@ uri = try $ do
-- displacement (the difference between the source column at the end
-- and the source column at the beginning). Vertical displacement
-- (source row) is ignored.
-withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
- -> GenParser Char st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement :: Parser [Char] st a -- ^ Parser to apply
+ -> Parser [Char] st (a, Int) -- ^ (result, displacement)
withHorizDisplacement parser = do
pos1 <- getPosition
result <- parser
@@ -304,7 +400,7 @@ withHorizDisplacement parser = do
-- | Applies a parser and returns the raw string that was parsed,
-- along with the value produced by the parser.
-withRaw :: GenParser Char st a -> GenParser Char st (a, [Char])
+withRaw :: Parser [Char] st a -> Parser [Char] st (a, [Char])
withRaw parser = do
pos1 <- getPosition
inp <- getInput
@@ -321,26 +417,16 @@ withRaw parser = do
-- | Parses a character and returns 'Null' (so that the parser can move on
-- if it gets stuck).
-nullBlock :: GenParser Char st Block
+nullBlock :: Parser [Char] st Block
nullBlock = anyChar >> return Null
--- | Fail if reader is in strict markdown syntax mode.
-failIfStrict :: GenParser a ParserState ()
-failIfStrict = do
- state <- getState
- if stateStrict state then fail "strict mode" else return ()
-
--- | Fail unless we're in literate haskell mode.
-failUnlessLHS :: GenParser tok ParserState ()
-failUnlessLHS = getState >>= guard . stateLiterateHaskell
-
-- | Parses backslash, then applies character parser.
-escaped :: GenParser Char st Char -- ^ Parser for character to escape
- -> GenParser Char st Char
+escaped :: Parser [Char] st Char -- ^ Parser for character to escape
+ -> Parser [Char] st Char
escaped parser = try $ char '\\' >> parser
-- | Parse character entity.
-characterReference :: GenParser Char st Char
+characterReference :: Parser [Char] st Char
characterReference = try $ do
char '&'
ent <- many1Till nonspaceChar (char ';')
@@ -349,19 +435,19 @@ characterReference = try $ do
Nothing -> fail "entity not found"
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: GenParser Char st (ListNumberStyle, Int)
+upperRoman :: Parser [Char] st (ListNumberStyle, Int)
upperRoman = do
num <- romanNumeral True
return (UpperRoman, num)
-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: GenParser Char st (ListNumberStyle, Int)
+lowerRoman :: Parser [Char] st (ListNumberStyle, Int)
lowerRoman = do
num <- romanNumeral False
return (LowerRoman, num)
-- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: GenParser Char st (ListNumberStyle, Int)
+decimal :: Parser [Char] st (ListNumberStyle, Int)
decimal = do
num <- many1 digit
return (Decimal, read num)
@@ -370,7 +456,7 @@ decimal = do
-- returns (DefaultStyle, [next example number]). The next
-- example number is incremented in parser state, and the label
-- (if present) is added to the label table.
-exampleNum :: GenParser Char ParserState (ListNumberStyle, Int)
+exampleNum :: Parser [Char] ParserState (ListNumberStyle, Int)
exampleNum = do
char '@'
lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-'))
@@ -384,38 +470,38 @@ exampleNum = do
return (Example, num)
-- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: GenParser Char st (ListNumberStyle, Int)
+defaultNum :: Parser [Char] st (ListNumberStyle, Int)
defaultNum = do
char '#'
return (DefaultStyle, 1)
-- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
+lowerAlpha :: Parser [Char] st (ListNumberStyle, Int)
lowerAlpha = do
ch <- oneOf ['a'..'z']
return (LowerAlpha, ord ch - ord 'a' + 1)
-- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: GenParser Char st (ListNumberStyle, Int)
+upperAlpha :: Parser [Char] st (ListNumberStyle, Int)
upperAlpha = do
ch <- oneOf ['A'..'Z']
return (UpperAlpha, ord ch - ord 'A' + 1)
-- | Parses a roman numeral i or I
-romanOne :: GenParser Char st (ListNumberStyle, Int)
+romanOne :: Parser [Char] st (ListNumberStyle, Int)
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
(char 'I' >> return (UpperRoman, 1))
-- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: GenParser Char ParserState ListAttributes
-anyOrderedListMarker = choice $
+anyOrderedListMarker :: Parser [Char] ParserState ListAttributes
+anyOrderedListMarker = choice $
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
numParser <- [decimal, exampleNum, defaultNum, romanOne,
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
+inPeriod :: Parser [Char] st (ListNumberStyle, Int)
+ -> Parser [Char] st ListAttributes
inPeriod num = try $ do
(style, start) <- num
char '.'
@@ -423,18 +509,18 @@ inPeriod num = try $ do
then DefaultDelim
else Period
return (start, style, delim)
-
+
-- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
+inOneParen :: Parser [Char] st (ListNumberStyle, Int)
+ -> Parser [Char] st ListAttributes
inOneParen num = try $ do
(style, start) <- num
char ')'
return (start, style, OneParen)
-- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
+inTwoParens :: Parser [Char] st (ListNumberStyle, Int)
+ -> Parser [Char] st ListAttributes
inTwoParens num = try $ do
char '('
(style, start) <- num
@@ -443,9 +529,9 @@ inTwoParens num = try $ do
-- | Parses an ordered list marker with a given style and delimiter,
-- returns number.
-orderedListMarker :: ListNumberStyle
- -> ListNumberDelim
- -> GenParser Char ParserState Int
+orderedListMarker :: ListNumberStyle
+ -> ListNumberDelim
+ -> Parser [Char] ParserState Int
orderedListMarker style delim = do
let num = defaultNum <|> -- # can continue any kind of list
case style of
@@ -465,38 +551,34 @@ orderedListMarker style delim = do
return start
-- | Parses a character reference and returns a Str element.
-charRef :: GenParser Char st Inline
+charRef :: Parser [Char] st Inline
charRef = do
c <- characterReference
return $ Str [c]
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Block]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState sep
- -> GenParser Char ParserState end
- -> GenParser Char ParserState [Inline]
- -> GenParser Char ParserState Block
-tableWith headerParser rowParser lineParser footerParser captionParser = try $ do
- caption' <- option [] captionParser
+tableWith :: Parser [Char] ParserState ([[Block]], [Alignment], [Int])
+ -> ([Int] -> Parser [Char] ParserState [[Block]])
+ -> Parser [Char] ParserState sep
+ -> Parser [Char] ParserState end
+ -> Parser [Char] ParserState Block
+tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
- lines' <- rowParser indices `sepEndBy` lineParser
+ lines' <- rowParser indices `sepEndBy1` lineParser
footerParser
- caption <- if null caption'
- then option [] captionParser
- else return caption'
- state <- getState
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table caption aligns widths heads lines'
+ numColumns <- getOption readerColumns
+ let widths = if (indices == [])
+ then replicate (length aligns) 0.0
+ else widthsFromIndices numColumns indices
+ return $ Table [] aligns widths heads lines'
-- Calculate relative widths of table columns, based on indices
widthsFromIndices :: Int -- Number of columns on terminal
-> [Int] -- Indices
-> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns' indices =
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns' indices =
let numColumns = max numColumns' (if null indices then 0 else last indices)
lengths' = zipWith (-) indices (0:indices)
lengths = reverse $
@@ -516,28 +598,30 @@ widthsFromIndices numColumns' indices =
fracs = map (\l -> (fromIntegral l) / quotient) lengths in
tail fracs
+---
+
-- Parse a grid table: starts with row of '-' on top, then header
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-gridTableWith :: GenParser Char ParserState Block -- ^ Block parser
- -> GenParser Char ParserState [Inline] -- ^ Caption parser
+gridTableWith :: Parser [Char] ParserState [Block] -- ^ Block list parser
-> Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
-gridTableWith block tableCaption headless =
- tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption
+ -> Parser [Char] ParserState Block
+gridTableWith blocks headless =
+ tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
+ (gridTableSep '-') gridTableFooter
gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitStringByIndices (init indices) $ removeTrailingSpace line
-gridPart :: Char -> GenParser Char st (Int, Int)
+gridPart :: Char -> Parser [Char] st (Int, Int)
gridPart ch = do
dashes <- many1 (char ch)
char '+'
return (length dashes, length dashes + 1)
-gridDashedLines :: Char -> GenParser Char st [(Int,Int)]
+gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
removeFinalBar :: String -> String
@@ -545,18 +629,18 @@ removeFinalBar =
reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-- | Separator between rows of grid table.
-gridTableSep :: Char -> GenParser Char ParserState Char
+gridTableSep :: Char -> Parser [Char] ParserState Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
gridTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
- -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
-gridTableHeader headless block = try $ do
+ -> Parser [Char] ParserState [Block]
+ -> Parser [Char] ParserState ([[Block]], [Alignment], [Int])
+gridTableHeader headless blocks = try $ do
optional blanklines
dashes <- gridDashedLines '-'
rawContent <- if headless
- then return $ repeat ""
+ then return $ repeat ""
else many1
(notFollowedBy (gridTableSep '=') >> char '|' >>
many1Till anyChar newline)
@@ -571,25 +655,25 @@ gridTableHeader headless block = try $ do
then replicate (length dashes) ""
else map (intercalate " ") $ transpose
$ map (gridTableSplitLine indices) rawContent
- heads <- mapM (parseFromString $ many block) $
+ heads <- mapM (parseFromString blocks) $
map removeLeadingTrailingSpace rawHeads
return (heads, aligns, indices)
-gridTableRawLine :: [Int] -> GenParser Char ParserState [String]
+gridTableRawLine :: [Int] -> Parser [Char] ParserState [String]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
return (gridTableSplitLine indices line)
-- | Parse row of grid table.
-gridTableRow :: GenParser Char ParserState Block
+gridTableRow :: Parser [Char] ParserState [Block]
-> [Int]
- -> GenParser Char ParserState [[Block]]
-gridTableRow block indices = do
+ -> Parser [Char] ParserState [[Block]]
+gridTableRow blocks indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
transpose colLines
- mapM (liftM compactifyCell . parseFromString (many block)) cols
+ mapM (liftM compactifyCell . parseFromString blocks) cols
removeOneLeadingSpace :: [String] -> [String]
removeOneLeadingSpace xs =
@@ -603,23 +687,23 @@ compactifyCell :: [Block] -> [Block]
compactifyCell bs = head $ compactify [bs]
-- | Parse footer for a grid table.
-gridTableFooter :: GenParser Char ParserState [Char]
+gridTableFooter :: Parser [Char] ParserState [Char]
gridTableFooter = blanklines
---
-- | Parse a string with a given parser and state.
-readWith :: GenParser t ParserState a -- ^ parser
+readWith :: Parser [t] ParserState a -- ^ parser
-> ParserState -- ^ initial state
-> [t] -- ^ input
-> a
-readWith parser state input =
+readWith parser state input =
case runParser parser state "source" input of
Left err' -> error $ "\nError:\n" ++ show err'
Right result -> result
-- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a) => GenParser Char ParserState a
+testStringWith :: (Show a) => Parser [Char] ParserState a
-> String
-> IO ()
testStringWith parser str = UTF8.putStrLn $ show $
@@ -627,72 +711,67 @@ testStringWith parser str = UTF8.putStrLn $ show $
-- | Parsing options.
data ParserState = ParserState
- { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
+ { stateOptions :: ReaderOptions, -- ^ User options
stateParserContext :: ParserContext, -- ^ Inside list?
stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateAllowLinks :: Bool, -- ^ Allow parsing of links
stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph
stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
- stateKeys :: KeyTable, -- ^ List of reference keys
- stateCitations :: [String], -- ^ List of available citations
- stateNotes :: NoteTable, -- ^ List of notes
- stateTabStop :: Int, -- ^ Tab stop
- stateStandalone :: Bool, -- ^ Parse bibliographic info?
+ stateKeys :: KeyTable, -- ^ List of reference keys (with fallbacks)
+ stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
+ stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
stateTitle :: [Inline], -- ^ Title of document
stateAuthors :: [[Inline]], -- ^ Authors of document
stateDate :: [Inline], -- ^ Date of document
- stateStrict :: Bool, -- ^ Use strict markdown syntax?
- stateSmart :: Bool, -- ^ Use smart typography?
- stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior
- -- in parsing dashes; -- is em-dash;
- -- before numeral is en-dash
- stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
- stateColumns :: Int, -- ^ Number of columns in terminal
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
- stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks
stateNextExample :: Int, -- ^ Number of next example
- stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
+ stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
stateHasChapters :: Bool, -- ^ True if \chapter encountered
- stateApplyMacros :: Bool, -- ^ Apply LaTeX macros?
stateMacros :: [Macro], -- ^ List of macros defined so far
stateRstDefaultRole :: String -- ^ Current rST default interpreted text role
}
- deriving Show
+
+instance Default ParserState where
+ def = defaultParserState
defaultParserState :: ParserState
-defaultParserState =
- ParserState { stateParseRaw = False,
+defaultParserState =
+ ParserState { stateOptions = def,
stateParserContext = NullState,
stateQuoteContext = NoQuote,
+ stateAllowLinks = True,
stateMaxNestingLevel = 6,
stateLastStrPos = Nothing,
stateKeys = M.empty,
- stateCitations = [],
stateNotes = [],
- stateTabStop = 4,
- stateStandalone = False,
+ stateNotes' = [],
stateTitle = [],
stateAuthors = [],
stateDate = [],
- stateStrict = False,
- stateSmart = False,
- stateOldDashes = False,
- stateLiterateHaskell = False,
- stateColumns = 80,
stateHeaderTable = [],
- stateIndentedCodeClasses = [],
stateNextExample = 1,
stateExamples = M.empty,
stateHasChapters = False,
- stateApplyMacros = True,
stateMacros = [],
stateRstDefaultRole = "title-reference"}
-data HeaderType
+getOption :: (ReaderOptions -> a) -> Parser s ParserState a
+getOption f = (f . stateOptions) `fmap` getState
+
+-- | Succeed only if the extension is enabled.
+guardEnabled :: Extension -> Parser s ParserState ()
+guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
+
+-- | Succeed only if the extension is disabled.
+guardDisabled :: Extension -> Parser s ParserState ()
+guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
+
+data HeaderType
= SingleHeader Char -- ^ Single line of characters underneath
| DoubleHeader Char -- ^ Lines of characters above and below
deriving (Eq, Show)
-data ParserContext
+data ParserContext
= ListItemState -- ^ Used when running parser on list item contents
| NullState -- ^ Default state
deriving (Eq, Show)
@@ -705,51 +784,35 @@ data QuoteContext
type NoteTable = [(String, String)]
-newtype Key = Key [Inline] deriving (Show, Read, Eq, Ord)
+type NoteTable' = [(String, F Blocks)] -- used in markdown reader
-toKey :: [Inline] -> Key
-toKey = Key . bottomUp lowercase
- where lowercase :: Inline -> Inline
- lowercase (Str xs) = Str (map toLower xs)
- lowercase (Math t xs) = Math t (map toLower xs)
- lowercase (Code attr xs) = Code attr (map toLower xs)
- lowercase (RawInline f xs) = RawInline f (map toLower xs)
- lowercase LineBreak = Space
- lowercase x = x
+newtype Key = Key String deriving (Show, Read, Eq, Ord)
-fromKey :: Key -> [Inline]
-fromKey (Key xs) = xs
+toKey :: String -> Key
+toKey = Key . map toLower . unwords . words
type KeyTable = M.Map Key Target
--- | Look up key in key table and return target object.
-lookupKeySrc :: KeyTable -- ^ Key table
- -> Key -- ^ Key
- -> Maybe Target
-lookupKeySrc table key = case M.lookup key table of
- Nothing -> Nothing
- Just src -> Just src
-
-- | Fail unless we're in "smart typography" mode.
-failUnlessSmart :: GenParser tok ParserState ()
-failUnlessSmart = getState >>= guard . stateSmart
+failUnlessSmart :: Parser [tok] ParserState ()
+failUnlessSmart = getOption readerSmart >>= guard
-smartPunctuation :: GenParser Char ParserState Inline
- -> GenParser Char ParserState Inline
+smartPunctuation :: Parser [Char] ParserState Inline
+ -> Parser [Char] ParserState Inline
smartPunctuation inlineParser = do
failUnlessSmart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-apostrophe :: GenParser Char ParserState Inline
+apostrophe :: Parser [Char] ParserState Inline
apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019")
-quoted :: GenParser Char ParserState Inline
- -> GenParser Char ParserState Inline
+quoted :: Parser [Char] ParserState Inline
+ -> Parser [Char] ParserState Inline
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
withQuoteContext :: QuoteContext
- -> (GenParser Char ParserState Inline)
- -> GenParser Char ParserState Inline
+ -> Parser [Char] ParserState a
+ -> Parser [Char] ParserState a
withQuoteContext context parser = do
oldState <- getState
let oldQuoteContext = stateQuoteContext oldState
@@ -759,39 +822,39 @@ withQuoteContext context parser = do
setState newState { stateQuoteContext = oldQuoteContext }
return result
-singleQuoted :: GenParser Char ParserState Inline
- -> GenParser Char ParserState Inline
+singleQuoted :: Parser [Char] ParserState Inline
+ -> Parser [Char] ParserState Inline
singleQuoted inlineParser = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
return . Quoted SingleQuote . normalizeSpaces
-doubleQuoted :: GenParser Char ParserState Inline
- -> GenParser Char ParserState Inline
+doubleQuoted :: Parser [Char] ParserState Inline
+ -> Parser [Char] ParserState Inline
doubleQuoted inlineParser = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $ do
contents <- manyTill inlineParser doubleQuoteEnd
return . Quoted DoubleQuote . normalizeSpaces $ contents
-failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
+failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState ()
failIfInQuoteContext context = do
st <- getState
if stateQuoteContext st == context
then fail "already inside quotes"
else return ()
-charOrRef :: [Char] -> GenParser Char st Char
+charOrRef :: [Char] -> Parser [Char] st Char
charOrRef cs =
oneOf cs <|> try (do c <- characterReference
guard (c `elem` cs)
return c)
-updateLastStrPos :: GenParser Char ParserState ()
-updateLastStrPos = getPosition >>= \p ->
+updateLastStrPos :: Parser [Char] ParserState ()
+updateLastStrPos = getPosition >>= \p ->
updateState $ \s -> s{ stateLastStrPos = Just p }
-singleQuoteStart :: GenParser Char ParserState ()
+singleQuoteStart :: Parser [Char] ParserState ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
pos <- getPosition
@@ -802,61 +865,61 @@ singleQuoteStart = do
notFollowedBy (oneOf ")!],;:-? \t\n")
notFollowedBy (char '.') <|> lookAhead (string "..." >> return ())
notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum)))
+ satisfy (not . isAlphaNum)))
-- possess/contraction
return ()
-singleQuoteEnd :: GenParser Char st ()
+singleQuoteEnd :: Parser [Char] st ()
singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
-doubleQuoteStart :: GenParser Char ParserState ()
+doubleQuoteStart :: Parser [Char] ParserState ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do charOrRef "\"\8220\147"
notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n'))
-doubleQuoteEnd :: GenParser Char st ()
+doubleQuoteEnd :: Parser [Char] st ()
doubleQuoteEnd = do
charOrRef "\"\8221\148"
return ()
-ellipses :: GenParser Char st Inline
+ellipses :: Parser [Char] st Inline
ellipses = do
try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
return (Str "\8230")
-dash :: GenParser Char ParserState Inline
+dash :: Parser [Char] ParserState Inline
dash = do
- oldDashes <- stateOldDashes `fmap` getState
+ oldDashes <- getOption readerOldDashes
if oldDashes
then emDashOld <|> enDashOld
else Str `fmap` (hyphenDash <|> emDash <|> enDash)
-- Two hyphens = en-dash, three = em-dash
-hyphenDash :: GenParser Char st String
+hyphenDash :: Parser [Char] st String
hyphenDash = do
try $ string "--"
option "\8211" (char '-' >> return "\8212")
-emDash :: GenParser Char st String
+emDash :: Parser [Char] st String
emDash = do
try (charOrRef "\8212\151")
return "\8212"
-enDash :: GenParser Char st String
+enDash :: Parser [Char] st String
enDash = do
try (charOrRef "\8212\151")
return "\8211"
-enDashOld :: GenParser Char st Inline
+enDashOld :: Parser [Char] st Inline
enDashOld = do
try (charOrRef "\8211\150") <|>
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
return (Str "\8211")
-emDashOld :: GenParser Char st Inline
+emDashOld :: Parser [Char] st Inline
emDashOld = do
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
return (Str "\8212")
@@ -866,24 +929,24 @@ emDashOld = do
--
-- | Parse a \newcommand or \renewcommand macro definition.
-macro :: GenParser Char ParserState Block
+macro :: Parser [Char] ParserState Block
macro = do
- apply <- stateApplyMacros `fmap` getState
+ apply <- getOption readerApplyMacros
inp <- getInput
case parseMacroDefinitions inp of
- ([], _) -> pzero
- (ms, rest) -> do def <- count (length inp - length rest) anyChar
+ ([], _) -> mzero
+ (ms, rest) -> do def' <- count (length inp - length rest) anyChar
if apply
then do
updateState $ \st ->
st { stateMacros = ms ++ stateMacros st }
return Null
- else return $ RawBlock "latex" def
+ else return $ RawBlock "latex" def'
-- | Apply current macros to string.
-applyMacros' :: String -> GenParser Char ParserState String
+applyMacros' :: String -> Parser [Char] ParserState String
applyMacros' target = do
- apply <- liftM stateApplyMacros getState
+ apply <- getOption readerApplyMacros
if apply
then do macros <- liftM stateMacros getState
return $ applyMacros macros target