diff options
Diffstat (limited to 'src/Text/Pandoc/Parsing.hs')
| -rw-r--r-- | src/Text/Pandoc/Parsing.hs | 97 |
1 files changed, 69 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index d1fba1e21..c316e9220 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -5,7 +5,7 @@ , MultiParamTypeClasses , FlexibleInstances #-} {- -Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Parsing - Copyright : Copyright (C) 2006-2014 John MacFarlane + Copyright : Copyright (C) 2006-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -65,6 +65,7 @@ module Text.Pandoc.Parsing ( anyLine, widthsFromIndices, gridTableWith, readWith, + readWithWarnings, readWithM, testStringWith, guardEnabled, @@ -162,6 +163,8 @@ module Text.Pandoc.Parsing ( anyLine, setSourceColumn, setSourceLine, newPos, + addWarning, + (<+?>) ) where @@ -175,7 +178,7 @@ import Text.Parsec hiding (token) import Text.Parsec.Pos (newPos) import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isHexDigit, isSpace ) -import Data.List ( intercalate, transpose ) +import Data.List ( intercalate, transpose, isSuffixOf ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro, @@ -190,6 +193,8 @@ import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative) import Data.Monoid import Data.Maybe (catMaybes) +import Text.Pandoc.Error + type Parser t s = Parsec t s type ParserT = ParsecT @@ -312,12 +317,14 @@ stringAnyCase (x:xs) = do return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: Stream s m t => ParserT s st m a -> s -> ParserT s st m a +parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a parseFromString parser str = do oldPos <- getPosition oldInput <- getInput setInput str result <- parser + spaces + eof setInput oldInput setPosition oldPos return result @@ -441,18 +448,17 @@ uri :: Stream [Char] m Char => ParserT [Char] st m (String, String) uri = try $ do scheme <- uriScheme char ':' - -- We allow punctuation except at the end, since + -- We allow sentence punctuation except at the end, since -- we don't want the trailing '.' in 'http://google.com.' We want to allow -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) -- as a URL, while NOT picking up the closing paren in -- (http://wikipedia.org). So we include balanced parens in the URL. - let isWordChar c = isAlphaNum c || c == '_' || c == '/' || c == '+' || - not (isAscii c) + let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-" let wordChar = satisfy isWordChar let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit) let entity = () <$ characterReference let punct = skipMany1 (char ',') - <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<')) + <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>')) let uriChunk = skipMany1 wordChar <|> percentEscaped <|> entity @@ -472,7 +478,12 @@ mathInlineWith op cl = try $ do string op notFollowedBy space words' <- many1Till (count 1 (noneOf " \t\n\\") - <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> (char '\\' >> + -- This next clause is needed because \text{..} can + -- contain $, \(\), etc. + (try (string "text" >> + (("\\text" ++) <$> inBalancedBraces 0 "")) + <|> (\c -> ['\\',c]) <$> anyChar)) <|> do (blankline <* notFollowedBy' blankline) <|> (oneOf " \t" <* skipMany (oneOf " \t")) notFollowedBy (char '$') @@ -480,6 +491,23 @@ mathInlineWith op cl = try $ do ) (try $ string cl) notFollowedBy digit -- to prevent capture of $5 return $ concat words' + where + inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String + inBalancedBraces 0 "" = do + c <- anyChar + if c == '{' + then inBalancedBraces 1 "{" + else mzero + inBalancedBraces 0 s = return $ reverse s + inBalancedBraces numOpen ('\\':xs) = do + c <- anyChar + inBalancedBraces numOpen (c:'\\':xs) + inBalancedBraces numOpen xs = do + c <- anyChar + case c of + '}' -> inBalancedBraces (numOpen - 1) (c:xs) + '{' -> inBalancedBraces (numOpen + 1) (c:xs) + _ -> inBalancedBraces numOpen (c:xs) mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String mathDisplayWith op cl = try $ do @@ -837,27 +865,27 @@ readWithM :: (Monad m, Functor m) => ParserT [Char] st m a -- ^ parser -> st -- ^ initial state -> String -- ^ input - -> m a + -> m (Either PandocError a) readWithM parser state input = - handleError <$> (runParserT parser state "source" input) - where - handleError (Left err') = - let errPos = errorPos err' - errLine = sourceLine errPos - errColumn = sourceColumn errPos - theline = (lines input ++ [""]) !! (errLine - 1) - in error $ "\nError at " ++ show err' ++ "\n" ++ - theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ - "^" - handleError (Right result) = result + mapLeft (ParsecError input) <$> runParserT parser state "source" input + -- | Parse a string with a given parser and state readWith :: Parser [Char] st a -> st -> String - -> a + -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp +readWithWarnings :: Parser [Char] ParserState a + -> ParserState + -> String + -> Either PandocError (a, [String]) +readWithWarnings p = readWith $ do + doc <- p + warnings <- stateWarnings <$> getState + return (doc, warnings) + -- | Parse a string with @parser@ (for testing). testStringWith :: (Show a, Stream [Char] Identity Char) => ParserT [Char] ParserState Identity a @@ -874,7 +902,8 @@ data ParserState = ParserState stateAllowLinks :: Bool, -- ^ Allow parsing of links stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed - stateKeys :: KeyTable, -- ^ List of reference keys (with fallbacks) + stateKeys :: KeyTable, -- ^ List of reference keys + stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys stateSubstitutions :: SubstTable, -- ^ List of substitution references stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) @@ -888,10 +917,9 @@ data ParserState = ParserState stateHasChapters :: Bool, -- ^ True if \chapter encountered stateMacros :: [Macro], -- ^ List of macros defined so far stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role - stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles + stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles -- Triple represents: 1) Base role, 2) Optional format (only for :raw: - -- roles), 3) Source language annotation for code (could be used to - -- annotate role classes too). + -- roles), 3) Additional classes (rest of Attr is unused)). stateCaption :: Maybe Inlines, -- ^ Caption in current environment stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context @@ -973,6 +1001,7 @@ defaultParserState = stateMaxNestingLevel = 6, stateLastStrPos = Nothing, stateKeys = M.empty, + stateHeaderKeys = M.empty, stateSubstitutions = M.empty, stateNotes = [], stateNotes' = [], @@ -1034,7 +1063,9 @@ type NoteTable' = [(String, F Blocks)] -- used in markdown reader newtype Key = Key String deriving (Show, Read, Eq, Ord) toKey :: String -> Key -toKey = Key . map toLower . unwords . words +toKey = Key . map toLower . unwords . words . unbracket + where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs + unbracket xs = xs type KeyTable = M.Map Key Target @@ -1178,7 +1209,7 @@ citeKey = try $ do guard =<< notAfterString suppress_author <- option False (char '-' *> return True) char '@' - firstChar <- letter <|> char '_' + firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite let regchar = satisfy (\c -> isAlphaNum c || c == '_') let internal p = try $ p <* lookAhead regchar rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") @@ -1223,3 +1254,13 @@ applyMacros' target = do then do macros <- extractMacros <$> getState return $ applyMacros macros target else return target + +-- | Append a warning to the log. +addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState () +addWarning mbpos msg = + updateState $ \st -> st{ + stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : + stateWarnings st } +infixr 5 <+?> +(<+?>) :: (Monoid a, Monad m) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a +a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) |
