aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Parsing.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Parsing.hs')
-rw-r--r--src/Text/Pandoc/Parsing.hs1329
1 files changed, 0 insertions, 1329 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
deleted file mode 100644
index 400d07f2a..000000000
--- a/src/Text/Pandoc/Parsing.hs
+++ /dev/null
@@ -1,1329 +0,0 @@
-{-# LANGUAGE
- FlexibleContexts
-, GeneralizedNewtypeDeriving
-, TypeSynonymInstances
-, MultiParamTypeClasses
-, FlexibleInstances
-, IncoherentInstances #-}
-
-{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Parsing
- Copyright : Copyright (C) 2006-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-A utility library with parsers used in pandoc readers.
--}
-module Text.Pandoc.Parsing ( anyLine,
- many1Till,
- notFollowedBy',
- oneOfStrings,
- oneOfStringsCI,
- spaceChar,
- nonspaceChar,
- skipSpaces,
- blankline,
- blanklines,
- enclosed,
- stringAnyCase,
- parseFromString,
- lineClump,
- charsInBalanced,
- romanNumeral,
- emailAddress,
- uri,
- mathInline,
- mathDisplay,
- withHorizDisplacement,
- withRaw,
- escaped,
- characterReference,
- anyOrderedListMarker,
- orderedListMarker,
- charRef,
- lineBlockLines,
- tableWith,
- widthsFromIndices,
- gridTableWith,
- readWith,
- readWithM,
- testStringWith,
- guardEnabled,
- guardDisabled,
- updateLastStrPos,
- notAfterString,
- logMessage,
- reportLogMessages,
- ParserState (..),
- HasReaderOptions (..),
- HasHeaderMap (..),
- HasIdentifierList (..),
- HasMacros (..),
- HasLogMessages (..),
- HasLastStrPosition (..),
- defaultParserState,
- HeaderType (..),
- ParserContext (..),
- QuoteContext (..),
- HasQuoteContext (..),
- NoteTable,
- NoteTable',
- KeyTable,
- SubstTable,
- Key (..),
- toKey,
- registerHeader,
- smartPunctuation,
- singleQuoteStart,
- singleQuoteEnd,
- doubleQuoteStart,
- doubleQuoteEnd,
- ellipses,
- apostrophe,
- dash,
- nested,
- citeKey,
- macro,
- applyMacros',
- Parser,
- ParserT,
- F(..),
- runF,
- askF,
- asksF,
- token,
- (<+?>),
- extractIdClass,
- insertIncludedFile,
- -- * Re-exports from Text.Pandoc.Parsec
- Stream,
- runParser,
- runParserT,
- parse,
- anyToken,
- getInput,
- setInput,
- unexpected,
- char,
- letter,
- digit,
- alphaNum,
- skipMany,
- skipMany1,
- spaces,
- space,
- anyChar,
- satisfy,
- newline,
- string,
- count,
- eof,
- noneOf,
- oneOf,
- lookAhead,
- notFollowedBy,
- many,
- many1,
- manyTill,
- (<|>),
- (<?>),
- choice,
- try,
- sepBy,
- sepBy1,
- sepEndBy,
- sepEndBy1,
- endBy,
- endBy1,
- option,
- optional,
- optionMaybe,
- getState,
- setState,
- updateState,
- SourcePos,
- getPosition,
- setPosition,
- sourceColumn,
- sourceLine,
- setSourceColumn,
- setSourceLine,
- newPos
- )
-where
-
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.XML (fromEntities)
-import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
-import Text.Parsec hiding (token)
-import Text.Parsec.Pos (newPos)
-import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
- isHexDigit, isSpace, isPunctuation )
-import Data.List ( intercalate, transpose, isSuffixOf )
-import Text.Pandoc.Shared
-import qualified Data.Map as M
-import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro,
- parseMacroDefinitions)
-import Text.HTML.TagSoup.Entity ( lookupEntity )
-import Text.Pandoc.Asciify (toAsciiChar)
-import Data.Monoid ((<>))
-import Text.Pandoc.Class (PandocMonad, readFileFromDirs, report)
-import Text.Pandoc.Logging
-import Data.Default
-import qualified Data.Set as Set
-import Control.Monad.Reader
-import Control.Monad.Identity
-import Data.Maybe (catMaybes)
-
-import Text.Pandoc.Error
-import Control.Monad.Except
-
-type Parser t s = Parsec t s
-
-type ParserT = ParsecT
-
-newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor)
-
-runF :: F a -> ParserState -> a
-runF = runReader . unF
-
-askF :: F ParserState
-askF = F ask
-
-asksF :: (ParserState -> a) -> F a
-asksF f = F $ asks f
-
-instance Monoid a => Monoid (F a) where
- mempty = return mempty
- mappend = liftM2 mappend
- mconcat = liftM mconcat . sequence
-
--- | Parse any line of text
-anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char]
-anyLine = do
- -- This is much faster than:
- -- manyTill anyChar newline
- inp <- getInput
- pos <- getPosition
- case break (=='\n') inp of
- (this, '\n':rest) -> do
- -- needed to persuade parsec that this won't match an empty string:
- anyChar
- setInput rest
- setPosition $ incSourceLine (setSourceColumn pos 1) 1
- return this
- _ -> mzero
-
--- | Like @manyTill@, but reads at least one item.
-many1Till :: Stream s m t
- => ParserT s st m a
- -> ParserT s st m end
- -> ParserT s st m [a]
-many1Till p end = do
- first <- p
- rest <- manyTill p end
- return (first:rest)
-
--- | A more general form of @notFollowedBy@. This one allows any
--- type of parser to be specified, and succeeds only if that parser fails.
--- It does not consume any input.
-notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m ()
-notFollowedBy' p = try $ join $ do a <- try p
- return (unexpected (show a))
- <|>
- return (return ())
--- (This version due to Andrew Pimlott on the Haskell mailing list.)
-
-oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
-oneOfStrings' _ [] = fail "no strings"
-oneOfStrings' matches strs = try $ do
- c <- anyChar
- let strs' = [xs | (x:xs) <- strs, x `matches` c]
- case strs' of
- [] -> fail "not found"
- _ -> (c:) <$> oneOfStrings' matches strs'
- <|> if "" `elem` strs'
- then return [c]
- else fail "not found"
-
--- | Parses one of a list of strings. If the list contains
--- two strings one of which is a prefix of the other, the longer
--- string will be matched if possible.
-oneOfStrings :: Stream s m Char => [String] -> ParserT s st m String
-oneOfStrings = oneOfStrings' (==)
-
--- | Parses one of a list of strings (tried in order), case insensitive.
-oneOfStringsCI :: Stream s m Char => [String] -> ParserT s st m String
-oneOfStringsCI = oneOfStrings' ciMatch
- where ciMatch x y = toLower' x == toLower' y
- -- this optimizes toLower by checking common ASCII case
- -- first, before calling the expensive unicode-aware
- -- function:
- toLower' c | c >= 'A' && c <= 'Z' = chr (ord c + 32)
- | isAscii c = c
- | otherwise = toLower c
-
--- | Parses a space or tab.
-spaceChar :: Stream s m Char => ParserT s st m Char
-spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
-
--- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Stream s m Char => ParserT s st m Char
-nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r']
-
--- | Skips zero or more spaces or tabs.
-skipSpaces :: Stream s m Char => ParserT s st m ()
-skipSpaces = skipMany spaceChar
-
--- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Stream s m Char => ParserT s st m Char
-blankline = try $ skipSpaces >> newline
-
--- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Stream s m Char => ParserT s st m [Char]
-blanklines = many1 blankline
-
--- | Parses material enclosed between start and end parsers.
-enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser
- -> ParserT s st m end -- ^ end parser
- -> ParserT s st m a -- ^ content parser (to be used repeatedly)
- -> ParserT s st m [a]
-enclosed start end parser = try $
- start >> notFollowedBy space >> many1Till parser end
-
--- | Parse string, case insensitive.
-stringAnyCase :: Stream s m Char => [Char] -> ParserT s st m String
-stringAnyCase [] = string ""
-stringAnyCase (x:xs) = do
- firstChar <- char (toUpper x) <|> char (toLower x)
- rest <- stringAnyCase xs
- return (firstChar:rest)
-
--- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a
-parseFromString parser str = do
- oldPos <- getPosition
- oldInput <- getInput
- setInput str
- result <- parser
- spaces
- eof
- setInput oldInput
- setPosition oldPos
- return result
-
--- | Parse raw line block up to and including blank lines.
-lineClump :: Stream [Char] m Char => ParserT [Char] st m String
-lineClump = blanklines
- <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
-
--- | Parse a string of characters between an open character
--- and a close character, including text between balanced
--- pairs of open and close, which must be different. For example,
--- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
--- and return "hello (there)".
-charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
- -> ParserT s st m String
-charsInBalanced open close parser = try $ do
- char open
- let isDelim c = c == open || c == close
- raw <- many $ many1 (notFollowedBy (satisfy isDelim) >> parser)
- <|> (do res <- charsInBalanced open close parser
- return $ [open] ++ res ++ [close])
- char close
- return $ concat raw
-
--- old charsInBalanced would be:
--- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline)
--- old charsInBalanced' would be:
--- charsInBalanced open close anyChar
-
--- Auxiliary functions for romanNumeral:
-
-lowercaseRomanDigits :: [Char]
-lowercaseRomanDigits = ['i','v','x','l','c','d','m']
-
-uppercaseRomanDigits :: [Char]
-uppercaseRomanDigits = map toUpper lowercaseRomanDigits
-
--- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true
- -> ParserT s st m Int
-romanNumeral upperCase = do
- let romanDigits = if upperCase
- then uppercaseRomanDigits
- else lowercaseRomanDigits
- lookAhead $ oneOf romanDigits
- let [one, five, ten, fifty, hundred, fivehundred, thousand] =
- map char romanDigits
- thousands <- many thousand >>= (return . (1000 *) . length)
- ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
- fivehundreds <- many fivehundred >>= (return . (500 *) . length)
- fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
- hundreds <- many hundred >>= (return . (100 *) . length)
- nineties <- option 0 $ try $ ten >> hundred >> return 90
- fifties <- many fifty >>= (return . (50 *) . length)
- forties <- option 0 $ try $ ten >> fifty >> return 40
- tens <- many ten >>= (return . (10 *) . length)
- nines <- option 0 $ try $ one >> ten >> return 9
- fives <- many five >>= (return . (5 *) . length)
- fours <- option 0 $ try $ one >> five >> return 4
- ones <- many one >>= (return . length)
- let total = thousands + ninehundreds + fivehundreds + fourhundreds +
- hundreds + nineties + fifties + forties + tens + nines +
- fives + fours + ones
- if total == 0
- then fail "not a roman numeral"
- else return total
-
--- Parsers for email addresses and URIs
-
--- | Parses an email address; returns original and corresponding
--- escaped mailto: URI.
-emailAddress :: Stream s m Char => ParserT s st m (String, String)
-emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
- where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom
- in (full, escapeURI $ "mailto:" ++ full)
- mailbox = intercalate "." <$> (emailWord `sepby1` dot)
- domain = intercalate "." <$> (subdomain `sepby1` dot)
- dot = char '.'
- subdomain = many1 $ alphaNum <|> innerPunct
- -- this excludes some valid email addresses, since an
- -- email could contain e.g. '__', but gives better results
- -- for our purposes, when combined with markdown parsing:
- innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@')
- <* notFollowedBy space
- <* notFollowedBy (satisfy isPunctuation))
- -- technically an email address could begin with a symbol,
- -- but allowing this creates too many problems.
- -- See e.g. https://github.com/jgm/pandoc/issues/2940
- emailWord = do x <- satisfy isAlphaNum
- xs <- many (satisfy isEmailChar)
- return (x:xs)
- isEmailChar c = isAlphaNum c || isEmailPunct c
- isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;"
- -- note: sepBy1 from parsec consumes input when sep
- -- succeeds and p fails, so we use this variant here.
- sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p))
-
-
--- Schemes from http://www.iana.org/assignments/uri-schemes.html plus
--- the unofficial schemes coap, doi, javascript, isbn, pmid
-schemes :: [String]
-schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
- "crid","data","dav","dict","dns","file","ftp","geo","go","gopher",
- "h323","http","https","iax","icap","im","imap","info","ipp","iris",
- "iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid",
- "msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp",
- "opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve",
- "sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet",
- "tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon",
- "xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s",
- "adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin",
- "bolo","callto","chrome","chrome-extension","com-eventbrite-attendee",
- "content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb",
- "ed2k","facetime","feed","finger","fish","gg","git","gizmoproject",
- "gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms",
- "keyparc","lastfm","ldaps","magnet","maps","market","message","mms",
- "ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi",
- "platform","proxy","psyc","query","res","resource","rmi","rsync",
- "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify",
- "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004",
- "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri",
- "ymsgr", "isbn", "pmid"]
-
-uriScheme :: Stream s m Char => ParserT s st m String
-uriScheme = oneOfStringsCI schemes
-
--- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
-uri = try $ do
- scheme <- uriScheme
- char ':'
- -- We allow sentence punctuation except at the end, since
- -- we don't want the trailing '.' in 'http://google.com.' We want to allow
- -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
- -- as a URL, while NOT picking up the closing paren in
- -- (http://wikipedia.org). So we include balanced parens in the URL.
- let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-"
- let wordChar = satisfy isWordChar
- let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit)
- let entity = () <$ characterReference
- let punct = skipMany1 (char ',')
- <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
- let uriChunk = skipMany1 wordChar
- <|> percentEscaped
- <|> entity
- <|> (try $ punct >>
- lookAhead (void (satisfy isWordChar) <|> percentEscaped))
- str <- snd <$> withRaw (skipMany1 ( () <$
- (enclosed (char '(') (char ')') uriChunk
- <|> enclosed (char '{') (char '}') uriChunk
- <|> enclosed (char '[') (char ']') uriChunk)
- <|> uriChunk))
- str' <- option str $ char '/' >> return (str ++ "/")
- let uri' = scheme ++ ":" ++ fromEntities str'
- return (uri', escapeURI uri')
-
-mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String
-mathInlineWith op cl = try $ do
- string op
- notFollowedBy space
- words' <- many1Till (count 1 (noneOf " \t\n\\")
- <|> (char '\\' >>
- -- This next clause is needed because \text{..} can
- -- contain $, \(\), etc.
- (try (string "text" >>
- (("\\text" ++) <$> inBalancedBraces 0 ""))
- <|> (\c -> ['\\',c]) <$> anyChar))
- <|> do (blankline <* notFollowedBy' blankline) <|>
- (oneOf " \t" <* skipMany (oneOf " \t"))
- notFollowedBy (char '$')
- return " "
- ) (try $ string cl)
- notFollowedBy digit -- to prevent capture of $5
- return $ concat words'
- where
- inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String
- inBalancedBraces 0 "" = do
- c <- anyChar
- if c == '{'
- then inBalancedBraces 1 "{"
- else mzero
- inBalancedBraces 0 s = return $ reverse s
- inBalancedBraces numOpen ('\\':xs) = do
- c <- anyChar
- inBalancedBraces numOpen (c:'\\':xs)
- inBalancedBraces numOpen xs = do
- c <- anyChar
- case c of
- '}' -> inBalancedBraces (numOpen - 1) (c:xs)
- '{' -> inBalancedBraces (numOpen + 1) (c:xs)
- _ -> inBalancedBraces numOpen (c:xs)
-
-mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String
-mathDisplayWith op cl = try $ do
- string op
- many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
-
-mathDisplay :: (HasReaderOptions st, Stream s m Char)
- => ParserT s st m String
-mathDisplay =
- (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
- <|> (guardEnabled Ext_tex_math_single_backslash >>
- mathDisplayWith "\\[" "\\]")
- <|> (guardEnabled Ext_tex_math_double_backslash >>
- mathDisplayWith "\\\\[" "\\\\]")
-
-mathInline :: (HasReaderOptions st , Stream s m Char)
- => ParserT s st m String
-mathInline =
- (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
- <|> (guardEnabled Ext_tex_math_single_backslash >>
- mathInlineWith "\\(" "\\)")
- <|> (guardEnabled Ext_tex_math_double_backslash >>
- mathInlineWith "\\\\(" "\\\\)")
-
--- | Applies a parser, returns tuple of its results and its horizontal
--- displacement (the difference between the source column at the end
--- and the source column at the beginning). Vertical displacement
--- (source row) is ignored.
-withHorizDisplacement :: Stream s m Char
- => ParserT s st m a -- ^ Parser to apply
- -> ParserT s st m (a, Int) -- ^ (result, displacement)
-withHorizDisplacement parser = do
- pos1 <- getPosition
- result <- parser
- pos2 <- getPosition
- return (result, sourceColumn pos2 - sourceColumn pos1)
-
--- | Applies a parser and returns the raw string that was parsed,
--- along with the value produced by the parser.
-withRaw :: Stream [Char] m Char => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char])
-withRaw parser = do
- pos1 <- getPosition
- inp <- getInput
- result <- parser
- pos2 <- getPosition
- let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
- let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
- let inplines = take ((l2 - l1) + 1) $ lines inp
- let raw = case inplines of
- [] -> ""
- [l] -> take (c2 - c1) l
- ls -> unlines (init ls) ++ take (c2 - 1) (last ls)
- return (result, raw)
-
--- | Parses backslash, then applies character parser.
-escaped :: Stream s m Char
- => ParserT s st m Char -- ^ Parser for character to escape
- -> ParserT s st m Char
-escaped parser = try $ char '\\' >> parser
-
--- | Parse character entity.
-characterReference :: Stream s m Char => ParserT s st m Char
-characterReference = try $ do
- char '&'
- ent <- many1Till nonspaceChar (char ';')
- let ent' = case ent of
- '#':'X':xs -> '#':'x':xs -- workaround tagsoup bug
- '#':_ -> ent
- _ -> ent ++ ";"
- case lookupEntity ent' of
- Just (c : _) -> return c
- _ -> fail "entity not found"
-
--- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-upperRoman = do
- num <- romanNumeral True
- return (UpperRoman, num)
-
--- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-lowerRoman = do
- num <- romanNumeral False
- return (LowerRoman, num)
-
--- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-decimal = do
- num <- many1 digit
- return (Decimal, read num)
-
--- | Parses a '@' and optional label and
--- returns (DefaultStyle, [next example number]). The next
--- example number is incremented in parser state, and the label
--- (if present) is added to the label table.
-exampleNum :: Stream s m Char
- => ParserT s ParserState m (ListNumberStyle, Int)
-exampleNum = do
- char '@'
- lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-'))
- st <- getState
- let num = stateNextExample st
- let newlabels = if null lab
- then stateExamples st
- else M.insert lab num $ stateExamples st
- updateState $ \s -> s{ stateNextExample = num + 1
- , stateExamples = newlabels }
- return (Example, num)
-
--- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-defaultNum = do
- char '#'
- return (DefaultStyle, 1)
-
--- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-lowerAlpha = do
- ch <- oneOf ['a'..'z']
- return (LowerAlpha, ord ch - ord 'a' + 1)
-
--- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-upperAlpha = do
- ch <- oneOf ['A'..'Z']
- return (UpperAlpha, ord ch - ord 'A' + 1)
-
--- | Parses a roman numeral i or I
-romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
- (char 'I' >> return (UpperRoman, 1))
-
--- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
-anyOrderedListMarker = choice $
- [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
- numParser <- [decimal, exampleNum, defaultNum, romanOne,
- lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-
--- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Stream s m Char
- => ParserT s st m (ListNumberStyle, Int)
- -> ParserT s st m ListAttributes
-inPeriod num = try $ do
- (style, start) <- num
- char '.'
- let delim = if style == DefaultStyle
- then DefaultDelim
- else Period
- return (start, style, delim)
-
--- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Stream s m Char
- => ParserT s st m (ListNumberStyle, Int)
- -> ParserT s st m ListAttributes
-inOneParen num = try $ do
- (style, start) <- num
- char ')'
- return (start, style, OneParen)
-
--- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Stream s m Char
- => ParserT s st m (ListNumberStyle, Int)
- -> ParserT s st m ListAttributes
-inTwoParens num = try $ do
- char '('
- (style, start) <- num
- char ')'
- return (start, style, TwoParens)
-
--- | Parses an ordered list marker with a given style and delimiter,
--- returns number.
-orderedListMarker :: Stream s m Char
- => ListNumberStyle
- -> ListNumberDelim
- -> ParserT s ParserState m Int
-orderedListMarker style delim = do
- let num = defaultNum <|> -- # can continue any kind of list
- case style of
- DefaultStyle -> decimal
- Example -> exampleNum
- Decimal -> decimal
- UpperRoman -> upperRoman
- LowerRoman -> lowerRoman
- UpperAlpha -> upperAlpha
- LowerAlpha -> lowerAlpha
- let context = case delim of
- DefaultDelim -> inPeriod
- Period -> inPeriod
- OneParen -> inOneParen
- TwoParens -> inTwoParens
- (start, _, _) <- context num
- return start
-
--- | Parses a character reference and returns a Str element.
-charRef :: Stream s m Char => ParserT s st m Inline
-charRef = do
- c <- characterReference
- return $ Str [c]
-
-lineBlockLine :: Stream [Char] m Char => ParserT [Char] st m String
-lineBlockLine = try $ do
- char '|'
- char ' '
- white <- many (spaceChar >> return '\160')
- notFollowedBy newline
- line <- anyLine
- continuations <- many (try $ char ' ' >> anyLine)
- return $ white ++ unwords (line : continuations)
-
-blankLineBlockLine :: Stream [Char] m Char => ParserT [Char] st m Char
-blankLineBlockLine = try (char '|' >> blankline)
-
--- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Stream [Char] m Char => ParserT [Char] st m [String]
-lineBlockLines = try $ do
- lines' <- many1 (lineBlockLine <|> ((:[]) <$> blankLineBlockLine))
- skipMany1 $ blankline <|> blankLineBlockLine
- return lines'
-
--- | Parse a table using 'headerParser', 'rowParser',
--- 'lineParser', and 'footerParser'.
-tableWith :: Stream s m Char
- => ParserT s ParserState m ([Blocks], [Alignment], [Int])
- -> ([Int] -> ParserT s ParserState m [Blocks])
- -> ParserT s ParserState m sep
- -> ParserT s ParserState m end
- -> ParserT s ParserState m Blocks
-tableWith headerParser rowParser lineParser footerParser = try $ do
- (heads, aligns, indices) <- headerParser
- lines' <- rowParser indices `sepEndBy1` lineParser
- footerParser
- numColumns <- getOption readerColumns
- let widths = if (indices == [])
- then replicate (length aligns) 0.0
- else widthsFromIndices numColumns indices
- return $ B.table mempty (zip aligns widths) heads lines'
-
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns' indices =
- let numColumns = max numColumns' (if null indices then 0 else last indices)
- lengths' = zipWith (-) indices (0:indices)
- lengths = reverse $
- case reverse lengths' of
- [] -> []
- [x] -> [x]
- -- compensate for the fact that intercolumn
- -- spaces are counted in widths of all columns
- -- but the last...
- (x:y:zs) -> if x < y && y - x <= 2
- then y:y:zs
- else x:y:zs
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
----
-
--- Parse a grid table: starts with row of '-' on top, then header
--- (which may be grid), then the rows,
--- which may be grid, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
-gridTableWith :: Stream [Char] m Char
- => ParserT [Char] ParserState m Blocks -- ^ Block list parser
- -> Bool -- ^ Headerless table
- -> ParserT [Char] ParserState m Blocks
-gridTableWith blocks headless =
- tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
- (gridTableSep '-') gridTableFooter
-
-gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line = map removeFinalBar $ tail $
- splitStringByIndices (init indices) $ trimr line
-
-gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int)
-gridPart ch = do
- dashes <- many1 (char ch)
- char '+'
- return (length dashes, length dashes + 1)
-
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
-
-removeFinalBar :: String -> String
-removeFinalBar =
- reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-
--- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char
-gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-
--- | Parse header for a grid table.
-gridTableHeader :: Stream [Char] m Char
- => Bool -- ^ Headerless table
- -> ParserT [Char] ParserState m Blocks
- -> ParserT [Char] ParserState m ([Blocks], [Alignment], [Int])
-gridTableHeader headless blocks = try $ do
- optional blanklines
- dashes <- gridDashedLines '-'
- rawContent <- if headless
- then return $ repeat ""
- else many1
- (notFollowedBy (gridTableSep '=') >> char '|' >>
- many1Till anyChar newline)
- if headless
- then return ()
- else gridTableSep '=' >> return ()
- let lines' = map snd dashes
- let indices = scanl (+) 0 lines'
- let aligns = replicate (length lines') AlignDefault
- -- RST does not have a notion of alignments
- let rawHeads = if headless
- then replicate (length dashes) ""
- else map (intercalate " ") $ transpose
- $ map (gridTableSplitLine indices) rawContent
- heads <- mapM (parseFromString blocks) $ map trim rawHeads
- return (heads, aligns, indices)
-
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String]
-gridTableRawLine indices = do
- char '|'
- line <- many1Till anyChar newline
- return (gridTableSplitLine indices line)
-
--- | Parse row of grid table.
-gridTableRow :: Stream [Char] m Char
- => ParserT [Char] ParserState m Blocks
- -> [Int]
- -> ParserT [Char] ParserState m [Blocks]
-gridTableRow blocks indices = do
- colLines <- many1 (gridTableRawLine indices)
- let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
- transpose colLines
- mapM (liftM compactifyCell . parseFromString blocks) cols
-
-removeOneLeadingSpace :: [String] -> [String]
-removeOneLeadingSpace xs =
- if all startsWithSpace xs
- then map (drop 1) xs
- else xs
- where startsWithSpace "" = True
- startsWithSpace (y:_) = y == ' '
-
-compactifyCell :: Blocks -> Blocks
-compactifyCell bs = head $ compactify [bs]
-
--- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char]
-gridTableFooter = blanklines
-
----
-
--- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: (Monad m)
- => ParserT [Char] st m a -- ^ parser
- -> st -- ^ initial state
- -> String -- ^ input
- -> m (Either PandocError a)
-readWithM parser state input =
- mapLeft (PandocParsecError input) `liftM` runParserT parser state "source" input
-
-
--- | Parse a string with a given parser and state
-readWith :: Parser [Char] st a
- -> st
- -> String
- -> Either PandocError a
-readWith p t inp = runIdentity $ readWithM p t inp
-
--- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a)
- => ParserT [Char] ParserState Identity a
- -> [Char]
- -> IO ()
-testStringWith parser str = UTF8.putStrLn $ show $
- readWith parser defaultParserState str
-
--- | Parsing options.
-data ParserState = ParserState
- { stateOptions :: ReaderOptions, -- ^ User options
- stateParserContext :: ParserContext, -- ^ Inside list?
- stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
- stateAllowLinks :: Bool, -- ^ Allow parsing of links
- stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph
- stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
- stateKeys :: KeyTable, -- ^ List of reference keys
- stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys
- stateSubstitutions :: SubstTable, -- ^ List of substitution references
- stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
- stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
- stateMeta :: Meta, -- ^ Document metadata
- stateMeta' :: F Meta, -- ^ Document metadata
- stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
- stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links)
- stateIdentifiers :: Set.Set String, -- ^ Header identifiers used
- stateNextExample :: Int, -- ^ Number of next example
- stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
- stateHasChapters :: Bool, -- ^ True if \chapter encountered
- stateMacros :: [Macro], -- ^ List of macros defined so far
- stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
- stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
- -- Triple represents: 1) Base role, 2) Optional format (only for :raw:
- -- roles), 3) Additional classes (rest of Attr is unused)).
- stateCaption :: Maybe Inlines, -- ^ Caption in current environment
- stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
- stateContainers :: [String], -- ^ parent include files
- stateLogMessages :: [LogMessage], -- ^ log messages
- stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context
- }
-
-instance Default ParserState where
- def = defaultParserState
-
-instance HasMeta ParserState where
- setMeta field val st =
- st{ stateMeta = setMeta field val $ stateMeta st }
- deleteMeta field st =
- st{ stateMeta = deleteMeta field $ stateMeta st }
-
-class HasReaderOptions st where
- extractReaderOptions :: st -> ReaderOptions
- getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b
- -- default
- getOption f = (f . extractReaderOptions) <$> getState
-
-class HasQuoteContext st m where
- getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
- withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
-
-instance Monad m => HasQuoteContext ParserState m where
- getQuoteContext = stateQuoteContext <$> getState
- withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = stateQuoteContext oldState
- setState oldState { stateQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { stateQuoteContext = oldQuoteContext }
- return result
-
-instance HasReaderOptions ParserState where
- extractReaderOptions = stateOptions
-
-class HasHeaderMap st where
- extractHeaderMap :: st -> M.Map Inlines String
- updateHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) ->
- st -> st
-
-instance HasHeaderMap ParserState where
- extractHeaderMap = stateHeaders
- updateHeaderMap f st = st{ stateHeaders = f $ stateHeaders st }
-
-class HasIdentifierList st where
- extractIdentifierList :: st -> Set.Set String
- updateIdentifierList :: (Set.Set String -> Set.Set String) -> st -> st
-
-instance HasIdentifierList ParserState where
- extractIdentifierList = stateIdentifiers
- updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }
-
-class HasMacros st where
- extractMacros :: st -> [Macro]
- updateMacros :: ([Macro] -> [Macro]) -> st -> st
-
-instance HasMacros ParserState where
- extractMacros = stateMacros
- updateMacros f st = st{ stateMacros = f $ stateMacros st }
-
-class HasLastStrPosition st where
- setLastStrPos :: SourcePos -> st -> st
- getLastStrPos :: st -> Maybe SourcePos
-
-instance HasLastStrPosition ParserState where
- setLastStrPos pos st = st{ stateLastStrPos = Just pos }
- getLastStrPos st = stateLastStrPos st
-
-class HasLogMessages st where
- addLogMessage :: LogMessage -> st -> st
- getLogMessages :: st -> [LogMessage]
-
-instance HasLogMessages ParserState where
- addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st }
- getLogMessages st = reverse $ stateLogMessages st
-
-defaultParserState :: ParserState
-defaultParserState =
- ParserState { stateOptions = def,
- stateParserContext = NullState,
- stateQuoteContext = NoQuote,
- stateAllowLinks = True,
- stateMaxNestingLevel = 6,
- stateLastStrPos = Nothing,
- stateKeys = M.empty,
- stateHeaderKeys = M.empty,
- stateSubstitutions = M.empty,
- stateNotes = [],
- stateNotes' = [],
- stateMeta = nullMeta,
- stateMeta' = return nullMeta,
- stateHeaderTable = [],
- stateHeaders = M.empty,
- stateIdentifiers = Set.empty,
- stateNextExample = 1,
- stateExamples = M.empty,
- stateHasChapters = False,
- stateMacros = [],
- stateRstDefaultRole = "title-reference",
- stateRstCustomRoles = M.empty,
- stateCaption = Nothing,
- stateInHtmlBlock = Nothing,
- stateContainers = [],
- stateLogMessages = [],
- stateMarkdownAttribute = False
- }
-
--- | Add a log message.
-logMessage :: (Stream s m a, HasLogMessages st)
- => LogMessage -> ParserT s st m ()
-logMessage msg = updateState (addLogMessage msg)
-
--- | Report all the accumulated log messages, according to verbosity level.
-reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParserT s st m ()
-reportLogMessages = do
- msgs <- getLogMessages <$> getState
- mapM_ report msgs
-
--- | Succeed only if the extension is enabled.
-guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
-guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext
-
--- | Succeed only if the extension is disabled.
-guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
-guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext
-
--- | Update the position on which the last string ended.
-updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m ()
-updateLastStrPos = getPosition >>= updateState . setLastStrPos
-
--- | Whether we are right after the end of a string.
-notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool
-notAfterString = do
- pos <- getPosition
- st <- getState
- return $ getLastStrPos st /= Just pos
-
-data HeaderType
- = SingleHeader Char -- ^ Single line of characters underneath
- | DoubleHeader Char -- ^ Lines of characters above and below
- deriving (Eq, Show)
-
-data ParserContext
- = ListItemState -- ^ Used when running parser on list item contents
- | NullState -- ^ Default state
- deriving (Eq, Show)
-
-data QuoteContext
- = InSingleQuote -- ^ Used when parsing inside single quotes
- | InDoubleQuote -- ^ Used when parsing inside double quotes
- | NoQuote -- ^ Used when not parsing inside quotes
- deriving (Eq, Show)
-
-type NoteTable = [(String, String)]
-
-type NoteTable' = [(String, F Blocks)] -- used in markdown reader
-
-newtype Key = Key String deriving (Show, Read, Eq, Ord)
-
-toKey :: String -> Key
-toKey = Key . map toLower . unwords . words . unbracket
- where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs
- unbracket xs = xs
-
-type KeyTable = M.Map Key (Target, Attr)
-
-type SubstTable = M.Map Key Inlines
-
--- | Add header to the list of headers in state, together
--- with its associated identifier. If the identifier is null
--- and the auto_identifers extension is set, generate a new
--- unique identifier, and update the list of identifiers
--- in state.
-registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
- => Attr -> Inlines -> ParserT s st m Attr
-registerHeader (ident,classes,kvs) header' = do
- ids <- extractIdentifierList <$> getState
- exts <- getOption readerExtensions
- let insert' = M.insertWith (\_new old -> old)
- if null ident && Ext_auto_identifiers `extensionEnabled` exts
- then do
- let id' = uniqueIdent (B.toList header') ids
- let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
- then catMaybes $ map toAsciiChar id'
- else id'
- updateState $ updateIdentifierList $ Set.insert id'
- updateState $ updateIdentifierList $ Set.insert id''
- updateState $ updateHeaderMap $ insert' header' id'
- return (id'',classes,kvs)
- else do
- unless (null ident) $
- updateState $ updateHeaderMap $ insert' header' ident
- return (ident,classes,kvs)
-
-smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
- => ParserT s st m Inlines
- -> ParserT s st m Inlines
-smartPunctuation inlineParser = do
- guardEnabled Ext_smart
- choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-
-apostrophe :: Stream s m Char => ParserT s st m Inlines
-apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
-
-quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
- => ParserT s st m Inlines
- -> ParserT s st m Inlines
-quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
-
-singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
- => ParserT s st m Inlines
- -> ParserT s st m Inlines
-singleQuoted inlineParser = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
- return . B.singleQuoted . mconcat
-
-doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
- => ParserT s st m Inlines
- -> ParserT s st m Inlines
-doubleQuoted inlineParser = try $ do
- doubleQuoteStart
- withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
- return . B.doubleQuoted . mconcat
-
-failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
- => QuoteContext
- -> ParserT s st m ()
-failIfInQuoteContext context = do
- context' <- getQuoteContext
- if context' == context
- then fail "already inside quotes"
- else return ()
-
-charOrRef :: Stream s m Char => String -> ParserT s st m Char
-charOrRef cs =
- oneOf cs <|> try (do c <- characterReference
- guard (c `elem` cs)
- return c)
-
-singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
- => ParserT s st m ()
-singleQuoteStart = do
- failIfInQuoteContext InSingleQuote
- -- single quote start can't be right after str
- guard =<< notAfterString
- () <$ charOrRef "'\8216\145"
-
-singleQuoteEnd :: Stream s m Char
- => ParserT s st m ()
-singleQuoteEnd = try $ do
- charOrRef "'\8217\146"
- notFollowedBy alphaNum
-
-doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
- => ParserT s st m ()
-doubleQuoteStart = do
- failIfInQuoteContext InDoubleQuote
- try $ do charOrRef "\"\8220\147"
- notFollowedBy . satisfy $ flip elem [' ', '\t', '\n']
-
-doubleQuoteEnd :: Stream s m Char
- => ParserT s st m ()
-doubleQuoteEnd = void (charOrRef "\"\8221\148")
-
-ellipses :: Stream s m Char
- => ParserT s st m Inlines
-ellipses = try (string "..." >> return (B.str "\8230"))
-
-dash :: (HasReaderOptions st, Stream s m Char)
- => ParserT s st m Inlines
-dash = try $ do
- oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
- if oldDashes
- then do
- char '-'
- (char '-' >> return (B.str "\8212"))
- <|> (lookAhead digit >> return (B.str "\8211"))
- else do
- string "--"
- (char '-' >> return (B.str "\8212"))
- <|> return (B.str "\8211")
-
--- This is used to prevent exponential blowups for things like:
--- a**a*a**a*a**a*a**a*a**a*a**a*a**
-nested :: Stream s m a
- => ParserT s ParserState m a
- -> ParserT s ParserState m a
-nested p = do
- nestlevel <- stateMaxNestingLevel <$> getState
- guard $ nestlevel > 0
- updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
- res <- p
- updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
- return res
-
-citeKey :: (Stream s m Char, HasLastStrPosition st)
- => ParserT s st m (Bool, String)
-citeKey = try $ do
- guard =<< notAfterString
- suppress_author <- option False (char '-' *> return True)
- char '@'
- firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
- let regchar = satisfy (\c -> isAlphaNum c || c == '_')
- let internal p = try $ p <* lookAhead regchar
- rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
- try (oneOf ":/" <* lookAhead (char '/'))
- let key = firstChar:rest
- return (suppress_author, key)
-
-
-token :: (Stream s m t)
- => (t -> String)
- -> (t -> SourcePos)
- -> (t -> Maybe a)
- -> ParsecT s st m a
-token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
-
---
--- Macros
---
-
--- | Parse a \newcommand or \renewcommand macro definition.
-macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st)
- => ParserT [Char] st m Blocks
-macro = do
- apply <- getOption readerApplyMacros
- inp <- getInput
- case parseMacroDefinitions inp of
- ([], _) -> mzero
- (ms, rest) -> do def' <- count (length inp - length rest) anyChar
- if apply
- then do
- updateState $ \st ->
- updateMacros (ms ++) st
- return mempty
- else return $ rawBlock "latex" def'
-
--- | Apply current macros to string.
-applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
- => String
- -> ParserT [Char] st m String
-applyMacros' target = do
- apply <- getOption readerApplyMacros
- if apply
- then do macros <- extractMacros <$> getState
- return $ applyMacros macros target
- else return target
-
-infixr 5 <+?>
-(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
-a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)
-
-extractIdClass :: Attr -> Attr
-extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
- where
- ident' = case (lookup "id" kvs) of
- Just v -> v
- Nothing -> ident
- cls' = case (lookup "class" kvs) of
- Just cl -> words cl
- Nothing -> cls
- kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs
-
-insertIncludedFile :: PandocMonad m
- => ParserT String ParserState m Blocks
- -> [FilePath] -> FilePath
- -> ParserT String ParserState m Blocks
-insertIncludedFile blocks dirs f = do
- oldPos <- getPosition
- oldInput <- getInput
- containers <- stateContainers <$> getState
- when (f `elem` containers) $
- throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
- updateState $ \s -> s{ stateContainers = f : stateContainers s }
- mbcontents <- readFileFromDirs dirs f
- contents <- case mbcontents of
- Just s -> return s
- Nothing -> do
- report $ CouldNotLoadIncludeFile f oldPos
- return ""
- setPosition $ newPos f 1 1
- setInput contents
- bs <- blocks
- setInput oldInput
- setPosition oldPos
- updateState $ \s -> s{ stateContainers = tail $ stateContainers s }
- return bs