1 files changed, 69 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index d1fba1e21..c316e9220 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -5,7 +5,7 @@
 , MultiParamTypeClasses
 , FlexibleInstances #-}
 {-
-Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -24,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Parsing
-   Copyright   : Copyright (C) 2006-2014 John MacFarlane
+   Copyright   : Copyright (C) 2006-2015 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -65,6 +65,7 @@ module Text.Pandoc.Parsing ( anyLine,
                              widthsFromIndices,
                              gridTableWith,
                              readWith,
+                             readWithWarnings,
                              readWithM,
                              testStringWith,
                              guardEnabled,
@@ -162,6 +163,8 @@ module Text.Pandoc.Parsing ( anyLine,
                              setSourceColumn,
                              setSourceLine,
                              newPos,
+                             addWarning,
+                             (<+?>)
                              )
 where
 
@@ -175,7 +178,7 @@ import Text.Parsec hiding (token)
 import Text.Parsec.Pos (newPos)
 import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
                    isHexDigit, isSpace )
-import Data.List ( intercalate, transpose )
+import Data.List ( intercalate, transpose, isSuffixOf )
 import Text.Pandoc.Shared
 import qualified Data.Map as M
 import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro,
@@ -190,6 +193,8 @@ import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative)
 import Data.Monoid
 import Data.Maybe (catMaybes)
 
+import Text.Pandoc.Error
+
 type Parser t s = Parsec t s
 
 type ParserT = ParsecT
@@ -312,12 +317,14 @@ stringAnyCase (x:xs) = do
   return (firstChar:rest)
 
 -- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: Stream s m t => ParserT s st m a -> s -> ParserT s st m a
+parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a
 parseFromString parser str = do
   oldPos <- getPosition
   oldInput <- getInput
   setInput str
   result <- parser
+  spaces
+  eof
   setInput oldInput
   setPosition oldPos
   return result
@@ -441,18 +448,17 @@ uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
 uri = try $ do
   scheme <- uriScheme
   char ':'
-  -- We allow punctuation except at the end, since
+  -- We allow sentence punctuation except at the end, since
   -- we don't want the trailing '.' in 'http://google.com.' We want to allow
   -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
   -- as a URL, while NOT picking up the closing paren in
   -- (http://wikipedia.org). So we include balanced parens in the URL.
-  let isWordChar c = isAlphaNum c || c == '_' || c == '/' || c == '+' ||
-                         not (isAscii c)
+  let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-"
   let wordChar = satisfy isWordChar
   let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit)
   let entity = () <$ characterReference
   let punct = skipMany1 (char ',')
-          <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<'))
+          <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
   let uriChunk =  skipMany1 wordChar
               <|> percentEscaped
               <|> entity
@@ -472,7 +478,12 @@ mathInlineWith op cl = try $ do
   string op
   notFollowedBy space
   words' <- many1Till (count 1 (noneOf " \t\n\\")
-                   <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
+                   <|> (char '\\' >>
+                           -- This next clause is needed because \text{..} can
+                           -- contain $, \(\), etc.
+                           (try (string "text" >>
+                                 (("\\text" ++) <$> inBalancedBraces 0 ""))
+                            <|>  (\c -> ['\\',c]) <$> anyChar))
                    <|> do (blankline <* notFollowedBy' blankline) <|>
                              (oneOf " \t" <* skipMany (oneOf " \t"))
                           notFollowedBy (char '$')
@@ -480,6 +491,23 @@ mathInlineWith op cl = try $ do
                     ) (try $ string cl)
   notFollowedBy digit  -- to prevent capture of $5
   return $ concat words'
+ where
+  inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String
+  inBalancedBraces 0 "" = do
+    c <- anyChar
+    if c == '{'
+       then inBalancedBraces 1 "{"
+       else mzero
+  inBalancedBraces 0 s = return $ reverse s
+  inBalancedBraces numOpen ('\\':xs) = do
+    c <- anyChar
+    inBalancedBraces numOpen (c:'\\':xs)
+  inBalancedBraces numOpen xs = do
+    c <- anyChar
+    case c of
+         '}' -> inBalancedBraces (numOpen - 1) (c:xs)
+         '{' -> inBalancedBraces (numOpen + 1) (c:xs)
+         _   -> inBalancedBraces numOpen (c:xs)
 
 mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String
 mathDisplayWith op cl = try $ do
@@ -837,27 +865,27 @@ readWithM :: (Monad m, Functor m)
           => ParserT [Char] st m a       -- ^ parser
           -> st                       -- ^ initial state
           -> String                   -- ^ input
-          -> m a
+          -> m (Either PandocError a)
 readWithM parser state input =
-    handleError <$> (runParserT parser state "source" input)
-    where
-      handleError (Left err') =
-        let errPos = errorPos err'
-            errLine = sourceLine errPos
-            errColumn = sourceColumn errPos
-            theline = (lines input ++ [""]) !! (errLine - 1)
-        in  error $ "\nError at " ++ show  err' ++ "\n" ++
-                theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++
-                "^"
-      handleError (Right result) = result
+    mapLeft (ParsecError input) <$> runParserT parser state "source" input
+
 
 -- | Parse a string with a given parser and state
 readWith :: Parser [Char] st a
          -> st
          -> String
-         -> a
+         -> Either PandocError a
 readWith p t inp = runIdentity $ readWithM p t inp
 
+readWithWarnings :: Parser [Char] ParserState a
+                    -> ParserState
+                    -> String
+                    -> Either PandocError (a, [String])
+readWithWarnings p = readWith $ do
+         doc <- p
+         warnings <- stateWarnings <$> getState
+         return (doc, warnings)
+
 -- | Parse a string with @parser@ (for testing).
 testStringWith :: (Show a, Stream [Char] Identity Char)
                => ParserT [Char] ParserState Identity a
@@ -874,7 +902,8 @@ data ParserState = ParserState
       stateAllowLinks      :: Bool,          -- ^ Allow parsing of links
       stateMaxNestingLevel :: Int,           -- ^ Max # of nested Strong/Emph
       stateLastStrPos      :: Maybe SourcePos, -- ^ Position after last str parsed
-      stateKeys            :: KeyTable,      -- ^ List of reference keys (with fallbacks)
+      stateKeys            :: KeyTable,      -- ^ List of reference keys
+      stateHeaderKeys      :: KeyTable,      -- ^ List of implicit header ref keys
       stateSubstitutions   :: SubstTable,    -- ^ List of substitution references
       stateNotes           :: NoteTable,     -- ^ List of notes (raw bodies)
       stateNotes'          :: NoteTable',    -- ^ List of notes (parsed bodies)
@@ -888,10 +917,9 @@ data ParserState = ParserState
       stateHasChapters     :: Bool,          -- ^ True if \chapter encountered
       stateMacros          :: [Macro],       -- ^ List of macros defined so far
       stateRstDefaultRole  :: String,        -- ^ Current rST default interpreted text role
-      stateRstCustomRoles  :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles
+      stateRstCustomRoles  :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
       -- Triple represents: 1) Base role, 2) Optional format (only for :raw:
-      -- roles), 3) Source language annotation for code (could be used to
-      -- annotate role classes too).
+      -- roles), 3) Additional classes (rest of Attr is unused)).
       stateCaption         :: Maybe Inlines, -- ^ Caption in current environment
       stateInHtmlBlock     :: Maybe String,  -- ^ Tag type of HTML block being parsed
       stateMarkdownAttribute :: Bool,        -- ^ True if in markdown=1 context
@@ -973,6 +1001,7 @@ defaultParserState =
                   stateMaxNestingLevel = 6,
                   stateLastStrPos      = Nothing,
                   stateKeys            = M.empty,
+                  stateHeaderKeys      = M.empty,
                   stateSubstitutions   = M.empty,
                   stateNotes           = [],
                   stateNotes'          = [],
@@ -1034,7 +1063,9 @@ type NoteTable' = [(String, F Blocks)]  -- used in markdown reader
 newtype Key = Key String deriving (Show, Read, Eq, Ord)
 
 toKey :: String -> Key
-toKey = Key . map toLower . unwords . words
+toKey = Key . map toLower . unwords . words . unbracket
+  where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs
+        unbracket xs       = xs
 
 type KeyTable = M.Map Key Target
 
@@ -1178,7 +1209,7 @@ citeKey = try $ do
   guard =<< notAfterString
   suppress_author <- option False (char '-' *> return True)
   char '@'
-  firstChar <- letter <|> char '_'
+  firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
   let regchar = satisfy (\c -> isAlphaNum c || c == '_')
   let internal p = try $ p <* lookAhead regchar
   rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
@@ -1223,3 +1254,13 @@ applyMacros' target = do
      then do macros <- extractMacros <$> getState
              return $ applyMacros macros target
      else return target
+
+-- | Append a warning to the log.
+addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState ()
+addWarning mbpos msg =
+  updateState $ \st -> st{
+    stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) :
+                     stateWarnings st }
+infixr 5 <+?>
+(<+?>) :: (Monoid a, Monad m) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
+a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)