aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Parsing.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Parsing.hs')
-rw-r--r--src/Text/Pandoc/Parsing.hs97
1 files changed, 69 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index d1fba1e21..c316e9220 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -5,7 +5,7 @@
, MultiParamTypeClasses
, FlexibleInstances #-}
{-
-Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -24,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Parsing
- Copyright : Copyright (C) 2006-2014 John MacFarlane
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -65,6 +65,7 @@ module Text.Pandoc.Parsing ( anyLine,
widthsFromIndices,
gridTableWith,
readWith,
+ readWithWarnings,
readWithM,
testStringWith,
guardEnabled,
@@ -162,6 +163,8 @@ module Text.Pandoc.Parsing ( anyLine,
setSourceColumn,
setSourceLine,
newPos,
+ addWarning,
+ (<+?>)
)
where
@@ -175,7 +178,7 @@ import Text.Parsec hiding (token)
import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
isHexDigit, isSpace )
-import Data.List ( intercalate, transpose )
+import Data.List ( intercalate, transpose, isSuffixOf )
import Text.Pandoc.Shared
import qualified Data.Map as M
import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro,
@@ -190,6 +193,8 @@ import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative)
import Data.Monoid
import Data.Maybe (catMaybes)
+import Text.Pandoc.Error
+
type Parser t s = Parsec t s
type ParserT = ParsecT
@@ -312,12 +317,14 @@ stringAnyCase (x:xs) = do
return (firstChar:rest)
-- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: Stream s m t => ParserT s st m a -> s -> ParserT s st m a
+parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a
parseFromString parser str = do
oldPos <- getPosition
oldInput <- getInput
setInput str
result <- parser
+ spaces
+ eof
setInput oldInput
setPosition oldPos
return result
@@ -441,18 +448,17 @@ uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
uri = try $ do
scheme <- uriScheme
char ':'
- -- We allow punctuation except at the end, since
+ -- We allow sentence punctuation except at the end, since
-- we don't want the trailing '.' in 'http://google.com.' We want to allow
-- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
-- as a URL, while NOT picking up the closing paren in
-- (http://wikipedia.org). So we include balanced parens in the URL.
- let isWordChar c = isAlphaNum c || c == '_' || c == '/' || c == '+' ||
- not (isAscii c)
+ let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-"
let wordChar = satisfy isWordChar
let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit)
let entity = () <$ characterReference
let punct = skipMany1 (char ',')
- <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<'))
+ <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
let uriChunk = skipMany1 wordChar
<|> percentEscaped
<|> entity
@@ -472,7 +478,12 @@ mathInlineWith op cl = try $ do
string op
notFollowedBy space
words' <- many1Till (count 1 (noneOf " \t\n\\")
- <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
+ <|> (char '\\' >>
+ -- This next clause is needed because \text{..} can
+ -- contain $, \(\), etc.
+ (try (string "text" >>
+ (("\\text" ++) <$> inBalancedBraces 0 ""))
+ <|> (\c -> ['\\',c]) <$> anyChar))
<|> do (blankline <* notFollowedBy' blankline) <|>
(oneOf " \t" <* skipMany (oneOf " \t"))
notFollowedBy (char '$')
@@ -480,6 +491,23 @@ mathInlineWith op cl = try $ do
) (try $ string cl)
notFollowedBy digit -- to prevent capture of $5
return $ concat words'
+ where
+ inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String
+ inBalancedBraces 0 "" = do
+ c <- anyChar
+ if c == '{'
+ then inBalancedBraces 1 "{"
+ else mzero
+ inBalancedBraces 0 s = return $ reverse s
+ inBalancedBraces numOpen ('\\':xs) = do
+ c <- anyChar
+ inBalancedBraces numOpen (c:'\\':xs)
+ inBalancedBraces numOpen xs = do
+ c <- anyChar
+ case c of
+ '}' -> inBalancedBraces (numOpen - 1) (c:xs)
+ '{' -> inBalancedBraces (numOpen + 1) (c:xs)
+ _ -> inBalancedBraces numOpen (c:xs)
mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String
mathDisplayWith op cl = try $ do
@@ -837,27 +865,27 @@ readWithM :: (Monad m, Functor m)
=> ParserT [Char] st m a -- ^ parser
-> st -- ^ initial state
-> String -- ^ input
- -> m a
+ -> m (Either PandocError a)
readWithM parser state input =
- handleError <$> (runParserT parser state "source" input)
- where
- handleError (Left err') =
- let errPos = errorPos err'
- errLine = sourceLine errPos
- errColumn = sourceColumn errPos
- theline = (lines input ++ [""]) !! (errLine - 1)
- in error $ "\nError at " ++ show err' ++ "\n" ++
- theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++
- "^"
- handleError (Right result) = result
+ mapLeft (ParsecError input) <$> runParserT parser state "source" input
+
-- | Parse a string with a given parser and state
readWith :: Parser [Char] st a
-> st
-> String
- -> a
+ -> Either PandocError a
readWith p t inp = runIdentity $ readWithM p t inp
+readWithWarnings :: Parser [Char] ParserState a
+ -> ParserState
+ -> String
+ -> Either PandocError (a, [String])
+readWithWarnings p = readWith $ do
+ doc <- p
+ warnings <- stateWarnings <$> getState
+ return (doc, warnings)
+
-- | Parse a string with @parser@ (for testing).
testStringWith :: (Show a, Stream [Char] Identity Char)
=> ParserT [Char] ParserState Identity a
@@ -874,7 +902,8 @@ data ParserState = ParserState
stateAllowLinks :: Bool, -- ^ Allow parsing of links
stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph
stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
- stateKeys :: KeyTable, -- ^ List of reference keys (with fallbacks)
+ stateKeys :: KeyTable, -- ^ List of reference keys
+ stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys
stateSubstitutions :: SubstTable, -- ^ List of substitution references
stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
@@ -888,10 +917,9 @@ data ParserState = ParserState
stateHasChapters :: Bool, -- ^ True if \chapter encountered
stateMacros :: [Macro], -- ^ List of macros defined so far
stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
- stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles
+ stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
-- Triple represents: 1) Base role, 2) Optional format (only for :raw:
- -- roles), 3) Source language annotation for code (could be used to
- -- annotate role classes too).
+ -- roles), 3) Additional classes (rest of Attr is unused)).
stateCaption :: Maybe Inlines, -- ^ Caption in current environment
stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context
@@ -973,6 +1001,7 @@ defaultParserState =
stateMaxNestingLevel = 6,
stateLastStrPos = Nothing,
stateKeys = M.empty,
+ stateHeaderKeys = M.empty,
stateSubstitutions = M.empty,
stateNotes = [],
stateNotes' = [],
@@ -1034,7 +1063,9 @@ type NoteTable' = [(String, F Blocks)] -- used in markdown reader
newtype Key = Key String deriving (Show, Read, Eq, Ord)
toKey :: String -> Key
-toKey = Key . map toLower . unwords . words
+toKey = Key . map toLower . unwords . words . unbracket
+ where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs
+ unbracket xs = xs
type KeyTable = M.Map Key Target
@@ -1178,7 +1209,7 @@ citeKey = try $ do
guard =<< notAfterString
suppress_author <- option False (char '-' *> return True)
char '@'
- firstChar <- letter <|> char '_'
+ firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
let internal p = try $ p <* lookAhead regchar
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
@@ -1223,3 +1254,13 @@ applyMacros' target = do
then do macros <- extractMacros <$> getState
return $ applyMacros macros target
else return target
+
+-- | Append a warning to the log.
+addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState ()
+addWarning mbpos msg =
+ updateState $ \st -> st{
+ stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) :
+ stateWarnings st }
+infixr 5 <+?>
+(<+?>) :: (Monoid a, Monad m) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
+a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)