1 files changed, 405 insertions, 264 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 4bae8942b..09445622d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -5,11 +5,10 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE IncoherentInstances        #-}
 {-# LANGUAGE MultiParamTypeClasses      #-}
-{-# LANGUAGE ViewPatterns               #-}
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -19,8 +18,7 @@
 A utility library with parsers used in pandoc readers.
 -}
 
-module Text.Pandoc.Parsing ( take1WhileP,
-                             takeP,
+module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
                              countChar,
                              textStr,
                              anyLine,
@@ -105,14 +103,14 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              singleQuoteEnd,
                              doubleQuoteStart,
                              doubleQuoteEnd,
-                             ellipses,
                              apostrophe,
+                             doubleCloseQuote,
+                             ellipses,
                              dash,
                              nested,
                              citeKey,
                              Parser,
                              ParserT,
-                             F,
                              Future(..),
                              runF,
                              askF,
@@ -123,7 +121,6 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              (<+?>),
                              extractIdClass,
                              insertIncludedFile,
-                             insertIncludedFileF,
                              -- * Re-exports from Text.Parsec
                              Stream,
                              runParser,
@@ -134,22 +131,10 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              getInput,
                              setInput,
                              unexpected,
-                             char,
-                             letter,
-                             digit,
-                             alphaNum,
                              skipMany,
                              skipMany1,
-                             spaces,
-                             space,
-                             anyChar,
-                             satisfy,
-                             newline,
-                             string,
                              count,
                              eof,
-                             noneOf,
-                             oneOf,
                              lookAhead,
                              notFollowedBy,
                              many,
@@ -174,6 +159,8 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              SourcePos,
                              getPosition,
                              setPosition,
+                             sourceName,
+                             setSourceName,
                              sourceColumn,
                              sourceLine,
                              setSourceColumn,
@@ -189,48 +176,141 @@ module Text.Pandoc.Parsing ( take1WhileP,
 where
 
 import Control.Monad.Identity
+    ( guard,
+      join,
+      unless,
+      when,
+      void,
+      liftM2,
+      liftM,
+      Identity(..),
+      MonadPlus(mzero) )
 import Control.Monad.Reader
-import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper,
-                  isPunctuation, isSpace, ord, toLower, toUpper)
-import Data.Default
+    ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
+import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
+                  isSpace, ord, toLower, toUpper)
+import Data.Default ( Default(..) )
 import Data.Functor (($>))
 import Data.List (intercalate, transpose)
 import qualified Data.Map as M
-import Data.Maybe (mapMaybe, fromMaybe)
+import Data.Maybe (fromMaybe)
 import qualified Data.Set as Set
-import Data.String
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup.Entity (lookupEntity)
-import Text.Pandoc.Asciify (toAsciiChar)
+import Text.Pandoc.Asciify (toAsciiText)
 import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
 import Text.Pandoc.Definition
+    ( Target,
+      nullMeta,
+      nullAttr,
+      Meta,
+      ColWidth(ColWidthDefault, ColWidth),
+      TableFoot(TableFoot),
+      TableBody(TableBody),
+      Attr,
+      TableHead(TableHead),
+      Row(..),
+      Alignment(..),
+      Inline(Str),
+      ListNumberDelim(..),
+      ListAttributes,
+      ListNumberStyle(..) )
 import Text.Pandoc.Logging
+    ( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) )
 import Text.Pandoc.Options
+    ( extensionEnabled,
+      Extension(Ext_old_dashes, Ext_tex_math_dollars,
+                Ext_tex_math_single_backslash, Ext_tex_math_double_backslash,
+                Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart),
+      ReaderOptions(readerTabStop, readerColumns, readerExtensions) )
 import Text.Pandoc.Readers.LaTeX.Types (Macro)
 import Text.Pandoc.Shared
+    ( uniqueIdent,
+      tshow,
+      mapLeft,
+      compactify,
+      trim,
+      trimr,
+      splitTextByIndices,
+      safeRead,
+      trimMath,
+      schemes,
+      escapeURI )
+import Text.Pandoc.Sources
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
 import Text.Pandoc.XML (fromEntities)
-import Text.Parsec hiding (token)
-import Text.Parsec.Pos (initialPos, newPos, updatePosString)
-
-import Control.Monad.Except
+import Text.Parsec
+    ( between,
+      setSourceName,
+      Parsec,
+      Column,
+      Line,
+      incSourceLine,
+      incSourceColumn,
+      setSourceLine,
+      setSourceColumn,
+      sourceLine,
+      sourceColumn,
+      sourceName,
+      setSourceName,
+      setPosition,
+      getPosition,
+      updateState,
+      setState,
+      getState,
+      optionMaybe,
+      optional,
+      option,
+      endBy1,
+      endBy,
+      sepEndBy1,
+      sepEndBy,
+      sepBy1,
+      sepBy,
+      try,
+      choice,
+      (<?>),
+      (<|>),
+      manyTill,
+      many1,
+      many,
+      notFollowedBy,
+      lookAhead,
+      eof,
+      count,
+      skipMany1,
+      skipMany,
+      unexpected,
+      setInput,
+      getInput,
+      anyToken,
+      tokenPrim,
+      parse,
+      runParserT,
+      runParser,
+      ParseError,
+      ParsecT,
+      SourcePos,
+      Stream(..) )
+import Text.Parsec.Pos (initialPos, newPos)
+import Control.Monad.Except ( MonadError(throwError) )
 import Text.Pandoc.Error
+    ( PandocError(PandocParseError, PandocParsecError) )
 
 type Parser t s = Parsec t s
 
 type ParserT = ParsecT
 
+
 -- | Reader monad wrapping the parser state. This is used to possibly delay
 -- evaluation until all relevant information has been parsed and made available
 -- in the parser state.
 newtype Future s a = Future { runDelayed :: Reader s a }
   deriving (Monad, Applicative, Functor)
 
-type F = Future ParserState
-
 runF :: Future s a -> s -> a
 runF = runReader . runDelayed
 
@@ -253,70 +333,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where
   mappend = (<>)
 
 -- | Like @count@, but packs its result
-countChar :: (Stream s m Char, Monad m)
+countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m)
           => Int
           -> ParsecT s st m Char
           -> ParsecT s st m Text
 countChar n = fmap T.pack . count n
 
 -- | Like @string@, but uses @Text@.
-textStr :: Stream s m Char => Text -> ParsecT s u m Text
+textStr :: (Stream s m Char, UpdateSourcePos s Char)
+        => Text -> ParsecT s u m Text
 textStr t = string (T.unpack t) $> t
 
--- | Parse characters while a predicate is true.
-take1WhileP :: Monad m
-           => (Char -> Bool)
-           -> ParserT Text st m Text
-take1WhileP f = do
-  -- needed to persuade parsec that this won't match an empty string:
-  c <- satisfy f
-  inp <- getInput
-  pos <- getPosition
-  let (t, rest) = T.span f inp
-  setInput rest
-  setPosition $
-    if f '\t' || f '\n'
-       then updatePosString pos $ T.unpack t
-       else incSourceColumn pos (T.length t)
-  return $ T.singleton c <> t
-
--- Parse n characters of input (or the rest of the input if
--- there aren't n characters).
-takeP :: Monad m => Int -> ParserT Text st m Text
-takeP n = do
-  guard (n > 0)
-  -- faster than 'count n anyChar'
-  inp <- getInput
-  pos <- getPosition
-  let (xs, rest) = T.splitAt n inp
-  -- needed to persuade parsec that this won't match an empty string:
-  anyChar
-  setInput rest
-  setPosition $ updatePosString pos $ T.unpack xs
-  return xs
-
--- | Parse any line of text
-anyLine :: Monad m => ParserT Text st m Text
+
+-- | Parse any line of text, returning the contents without the
+-- final newline.
+anyLine :: Monad m => ParserT Sources st m Text
 anyLine = do
   -- This is much faster than:
   -- manyTill anyChar newline
   inp <- getInput
-  pos <- getPosition
-  case T.break (=='\n') inp of
-       (this, T.uncons -> Just ('\n', rest)) -> do
-         -- needed to persuade parsec that this won't match an empty string:
-         anyChar
-         setInput rest
-         setPosition $ incSourceLine (setSourceColumn pos 1) 1
-         return this
-       _ -> mzero
+  case inp of
+    Sources [] -> mzero
+    Sources ((fp,t):inps) ->
+      -- we assume that lines don't span different input files
+      case T.break (=='\n') t of
+           (this, rest)
+             | T.null rest
+             , not (null inps) ->
+                -- line may span different input files, so do it
+                 -- character by character
+                 T.pack <$> manyTill anyChar newline
+             | otherwise -> do --  either end of inputs or newline in rest
+                 setInput $ Sources ((fp, rest):inps)
+                 char '\n' -- needed so parsec knows we won't match empty string
+                           -- and so source pos is updated
+                 return this
 
 -- | Parse any line, include the final newline in the output
-anyLineNewline :: Monad m => ParserT Text st m Text
+anyLineNewline :: Monad m => ParserT Sources st m Text
 anyLineNewline = (<> "\n") <$> anyLine
 
 -- | Parse indent by specified number of spaces (or equiv. tabs)
-indentWith :: Stream s m Char
+indentWith :: (Stream s m Char, UpdateSourcePos s Char)
            => HasReaderOptions st
            => Int -> ParserT s st m Text
 indentWith num = do
@@ -401,11 +459,13 @@ notFollowedBy' p  = try $ join $  do  a <- try p
                                   return (return ())
 -- (This version due to Andrew Pimlott on the Haskell mailing list.)
 
-oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
+oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char)
+              => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
 oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
 
 -- TODO: This should be re-implemented in a Text-aware way
-oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
+oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char)
+               => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
 oneOfStrings'' _ []   = Prelude.fail "no strings"
 oneOfStrings'' matches strs = try $ do
   c <- anyChar
@@ -420,14 +480,16 @@ oneOfStrings'' matches strs = try $ do
 -- | Parses one of a list of strings.  If the list contains
 -- two strings one of which is a prefix of the other, the longer
 -- string will be matched if possible.
-oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char)
+             => [Text] -> ParserT s st m Text
 oneOfStrings = oneOfStrings' (==)
 
 -- | Parses one of a list of strings (tried in order), case insensitive.
 
 -- TODO: This will not be accurate with general Unicode (neither
 -- Text.toLower nor Text.toCaseFold can be implemented with a map)
-oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char)
+               => [Text] -> ParserT s st m Text
 oneOfStringsCI = oneOfStrings' ciMatch
   where ciMatch x y = toLower' x == toLower' y
         -- this optimizes toLower by checking common ASCII case
@@ -438,29 +500,41 @@ oneOfStringsCI = oneOfStrings' ciMatch
                    | otherwise = toLower c
 
 -- | Parses a space or tab.
-spaceChar :: Stream s m Char => ParserT s st m Char
+spaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
 
 -- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Stream s m Char => ParserT s st m Char
-nonspaceChar = noneOf ['\t', '\n', ' ', '\r']
+nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+             => ParserT s st m Char
+nonspaceChar = satisfy (not . isSpaceChar)
+
+isSpaceChar :: Char -> Bool
+isSpaceChar ' '  = True
+isSpaceChar '\t' = True
+isSpaceChar '\n' = True
+isSpaceChar '\r' = True
+isSpaceChar _    = False
 
 -- | Skips zero or more spaces or tabs.
-skipSpaces :: Stream s m Char => ParserT s st m ()
+skipSpaces :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m ()
 skipSpaces = skipMany spaceChar
 
 -- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Stream s m Char => ParserT s st m Char
+blankline :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 blankline = try $ skipSpaces >> newline
 
 -- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Stream s m Char => ParserT s st m Text
+blanklines :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m Text
 blanklines = T.pack <$> many1 blankline
 
 -- | Gobble n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleSpaces :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m ()
+             => Int -> ParserT Sources st m ()
 gobbleSpaces 0 = return ()
 gobbleSpaces n
   | n < 0     = error "gobbleSpaces called with negative number"
@@ -468,18 +542,26 @@ gobbleSpaces n
       char ' ' <|> eatOneSpaceOfTab
       gobbleSpaces (n - 1)
 
-eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char
+eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char
 eatOneSpaceOfTab = do
-  char '\t'
+  lookAhead (char '\t')
+  pos <- getPosition
   tabstop <- getOption readerTabStop
+  -- replace the tab on the input stream with spaces
+  let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop)
   inp <- getInput
-  setInput $ T.replicate (tabstop - 1) " " <> inp
-  return ' '
+  setInput $
+    case inp of
+      Sources [] -> error "eatOneSpaceOfTab - empty Sources list"
+      Sources ((fp,t):rest) ->
+        -- drop the tab and add spaces
+        Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest)
+  char ' '
 
 -- | Gobble up to n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
-                   => Int -> ParserT Text st m Int
+                   => Int -> ParserT Sources st m Int
 gobbleAtMostSpaces 0 = return 0
 gobbleAtMostSpaces n
   | n < 0     = error "gobbleAtMostSpaces called with negative number"
@@ -488,7 +570,8 @@ gobbleAtMostSpaces n
       (+ 1) <$> gobbleAtMostSpaces (n - 1)
 
 -- | Parses material enclosed between start and end parsers.
-enclosed :: (Show end, Stream s  m Char) => ParserT s st m t   -- ^ start parser
+enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char)
+         => ParserT s st m t   -- ^ start parser
          -> ParserT s st m end  -- ^ end parser
          -> ParserT s st m a    -- ^ content parser (to be used repeatedly)
          -> ParserT s st m [a]
@@ -496,39 +579,41 @@ enclosed start end parser = try $
   start >> notFollowedBy space >> many1Till parser end
 
 -- | Parse string, case insensitive.
-stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text
+stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char)
+              => Text -> ParserT s st m Text
 stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
 
-stringAnyCase' :: Stream s m Char => String -> ParserT s st m String
+stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char)
+               => String -> ParserT s st m String
 stringAnyCase' [] = string ""
 stringAnyCase' (x:xs) = do
   firstChar <- char (toUpper x) <|> char (toLower x)
   rest <- stringAnyCase' xs
   return (firstChar:rest)
 
+-- TODO rewrite by just adding to Sources stream?
 -- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: (Stream s m Char, IsString s)
-                => ParserT s st m r
+parseFromString :: Monad m
+                => ParserT Sources st m r
                 -> Text
-                -> ParserT s st m r
+                -> ParserT Sources st m r
 parseFromString parser str = do
   oldPos <- getPosition
   setPosition $ initialPos "chunk"
   oldInput <- getInput
-  setInput $ fromString $ T.unpack str
+  setInput $ toSources str
   result <- parser
   spaces
-  eof
   setInput oldInput
   setPosition oldPos
   return result
 
 -- | Like 'parseFromString' but specialized for 'ParserState'.
 -- This resets 'stateLastStrPos', which is almost always what we want.
-parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u)
-                 => ParserT s u m a
+parseFromString' :: (Monad m, HasLastStrPosition u)
+                 => ParserT Sources u m a
                  -> Text
-                 -> ParserT s u m a
+                 -> ParserT Sources u m a
 parseFromString' parser str = do
   oldLastStrPos <- getLastStrPos <$> getState
   updateState $ setLastStrPos Nothing
@@ -537,7 +622,7 @@ parseFromString' parser str = do
   return res
 
 -- | Parse raw line block up to and including blank lines.
-lineClump :: Monad m => ParserT Text st m Text
+lineClump :: Monad m => ParserT Sources st m Text
 lineClump = blanklines
           <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
 
@@ -546,7 +631,7 @@ lineClump = blanklines
 -- pairs of open and close, which must be different. For example,
 -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
 -- and return "hello (there)".
-charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
+charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char
                 -> ParserT s st m Text
 charsInBalanced open close parser = try $ do
   char open
@@ -565,7 +650,7 @@ charsInBalanced open close parser = try $ do
 -- Auxiliary functions for romanNumeral:
 
 -- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Stream s m Char => Bool                  -- ^ Uppercase if true
+romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool                  -- ^ Uppercase if true
              -> ParserT s st m Int
 romanNumeral upperCase = do
     let rchar uc = char $ if upperCase then uc else toLower uc
@@ -601,20 +686,19 @@ romanNumeral upperCase = do
 
 -- | Parses an email address; returns original and corresponding
 -- escaped mailto: URI.
-emailAddress :: Stream s m Char => ParserT s st m (Text, Text)
+emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
  where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
                            in  (full, escapeURI $ "mailto:" <> full)
        mailbox           = intercalate "." <$> (emailWord `sepBy1'` dot)
        domain            = intercalate "." <$> (subdomain `sepBy1'` dot)
        dot               = char '.'
-       subdomain         = many1 $ alphaNum <|> innerPunct
+       subdomain         = many1 $ alphaNum <|> innerPunct (=='-')
        -- this excludes some valid email addresses, since an
        -- email could contain e.g. '__', but gives better results
        -- for our purposes, when combined with markdown parsing:
-       innerPunct        = try (satisfy (\c -> isEmailPunct c || c == '@')
-                                 <* notFollowedBy space
-                                 <* notFollowedBy (satisfy isPunctuation))
+       innerPunct f      = try (satisfy f
+                                 <* notFollowedBy (satisfy (not . isAlphaNum)))
        -- technically an email address could begin with a symbol,
        -- but allowing this creates too many problems.
        -- See e.g. https://github.com/jgm/pandoc/issues/2940
@@ -625,16 +709,16 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
        isEmailPunct c    = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
 
 
-uriScheme :: Stream s m Char => ParserT s st m Text
+uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text
 uriScheme = oneOfStringsCI (Set.toList schemes)
 
 -- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Stream s m Char => ParserT s st m (Text, Text)
+uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 uri = try $ do
   scheme <- uriScheme
   char ':'
   -- Avoid parsing e.g. "**Notes:**" as a raw URI:
-  notFollowedBy (oneOf "*_]")
+  notFollowedBy $ satisfy (\c -> c == '*' || c == '_' || c == ']')
   -- We allow sentence punctuation except at the end, since
   -- we don't want the trailing '.' in 'http://google.com.' We want to allow
   -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
@@ -648,7 +732,20 @@ uri = try $ do
   let uri' = scheme <> ":" <> fromEntities str'
   return (uri', escapeURI uri')
   where
-    wordChar = alphaNum <|> oneOf "#$%+/@\\_-&="
+    isWordChar '#' = True
+    isWordChar '$' = True
+    isWordChar '%' = True
+    isWordChar '+' = True
+    isWordChar '/' = True
+    isWordChar '@' = True
+    isWordChar '\\' = True
+    isWordChar '_' = True
+    isWordChar '-' = True
+    isWordChar '&' = True
+    isWordChar '=' = True
+    isWordChar c   = isAlphaNum c
+
+    wordChar = satisfy isWordChar
     percentEscaped = try $ (:) <$> char '%' <*> many1 hexDigit
     entity = try $ pure <$> characterReference
     punct = try $ many1 (char ',') <|> fmap pure (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
@@ -659,11 +756,13 @@ uri = try $ do
     uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
                                    return (T.pack $ [l] ++ chunk ++ [r])
 
-mathInlineWith :: Stream s m Char  => Text -> Text -> ParserT s st m Text
+mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char)  => Text -> Text -> ParserT s st m Text
 mathInlineWith op cl = try $ do
   textStr op
   when (op == "$") $ notFollowedBy space
-  words' <- many1Till (countChar 1 (noneOf " \t\n\\")
+  words' <- many1Till (
+                       (T.singleton <$>
+                          satisfy (\c -> not (isSpaceChar c || c == '\\')))
                    <|> (char '\\' >>
                            -- This next clause is needed because \text{..} can
                            -- contain $, \(\), etc.
@@ -671,17 +770,17 @@ mathInlineWith op cl = try $ do
                                  (("\\text" <>) <$> inBalancedBraces 0 ""))
                             <|>  (\c -> T.pack ['\\',c]) <$> anyChar))
                    <|> do (blankline <* notFollowedBy' blankline) <|>
-                             (oneOf " \t" <* skipMany (oneOf " \t"))
+                             (spaceChar <* skipMany spaceChar)
                           notFollowedBy (char '$')
                           return " "
                     ) (try $ textStr cl)
   notFollowedBy digit  -- to prevent capture of $5
   return $ trimMath $ T.concat words'
  where
-  inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text
+  inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text
   inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
 
-  inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String
+  inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String
   inBalancedBraces' 0 "" = do
     c <- anyChar
     if c == '{'
@@ -698,12 +797,13 @@ mathInlineWith op cl = try $ do
          '{' -> inBalancedBraces' (numOpen + 1) (c:xs)
          _   -> inBalancedBraces' numOpen (c:xs)
 
-mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
+mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
 mathDisplayWith op cl = try $ fmap T.pack $ do
   textStr op
-  many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ textStr cl)
+  many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
+            (try $ textStr cl)
 
-mathDisplay :: (HasReaderOptions st, Stream s m Char)
+mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m Text
 mathDisplay =
       (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
@@ -712,7 +812,7 @@ mathDisplay =
   <|> (guardEnabled Ext_tex_math_double_backslash >>
        mathDisplayWith "\\\\[" "\\\\]")
 
-mathInline :: (HasReaderOptions st , Stream s m Char)
+mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m Text
 mathInline =
       (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
@@ -725,7 +825,7 @@ mathInline =
 -- displacement (the difference between the source column at the end
 -- and the source column at the beginning). Vertical displacement
 -- (source row) is ignored.
-withHorizDisplacement :: Stream s m Char
+withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char)
                       => ParserT s st m a  -- ^ Parser to apply
                       -> ParserT s st m (a, Int) -- ^ (result, displacement)
 withHorizDisplacement parser = do
@@ -737,30 +837,37 @@ withHorizDisplacement parser = do
 -- | Applies a parser and returns the raw string that was parsed,
 -- along with the value produced by the parser.
 withRaw :: Monad m
-        => ParsecT Text st m a
-        -> ParsecT Text st m (a, Text)
+        => ParsecT Sources st m a
+        -> ParsecT Sources st m (a, Text)
 withRaw parser = do
-  pos1 <- getPosition
-  inp <- getInput
+  inps1 <- getInput
   result <- parser
-  pos2 <- getPosition
-  let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
-  let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
-  let inplines = take ((l2 - l1) + 1) $ T.lines inp
-  let raw = case inplines of
-                []  -> ""
-                [l] -> T.take (c2 - c1) l
-                ls  -> T.unlines (init ls) <> T.take (c2 - 1) (last ls)
-  return (result, raw)
+  inps2 <- getInput
+  -- 'raw' is the difference between inps1 and inps2
+  return (result, sourcesDifference inps1 inps2)
+
+sourcesDifference :: Sources -> Sources -> Text
+sourcesDifference (Sources is1) (Sources is2) = go is1 is2
+ where
+   go inps1 inps2 =
+    case (inps1, inps2) of
+      ([], _) -> mempty
+      (_, []) -> mconcat $ map snd inps1
+      ((p1,t1):rest1, (p2, t2):rest2)
+        | p1 == p2
+        , t1 == t2  -> go rest1 rest2
+        | p1 == p2
+        , t1 /= t2  -> fromMaybe mempty $ T.stripSuffix t2 t1
+        | otherwise -> t1 <> go rest1 inps2
 
 -- | Parses backslash, then applies character parser.
-escaped :: Stream s m Char
+escaped :: (Stream s m Char, UpdateSourcePos s Char)
         => ParserT s st m Char  -- ^ Parser for character to escape
         -> ParserT s st m Char
 escaped parser = try $ char '\\' >> parser
 
 -- | Parse character entity.
-characterReference :: Stream s m Char => ParserT s st m Char
+characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 characterReference = try $ do
   char '&'
   ent <- many1Till nonspaceChar (char ';')
@@ -773,19 +880,19 @@ characterReference = try $ do
        _            -> Prelude.fail "entity not found"
 
 -- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperRoman = do
   num <- romanNumeral True
   return (UpperRoman, num)
 
 -- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerRoman = do
   num <- romanNumeral False
   return (LowerRoman, num)
 
 -- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 decimal = do
   num <- many1 digit
   return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
@@ -794,7 +901,7 @@ decimal = do
 -- returns (DefaultStyle, [next example number]).  The next
 -- example number is incremented in parser state, and the label
 -- (if present) is added to the label table.
-exampleNum :: Stream s m Char
+exampleNum :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s ParserState m (ListNumberStyle, Int)
 exampleNum = do
   char '@'
@@ -813,37 +920,37 @@ exampleNum = do
   return (Example, num)
 
 -- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 defaultNum = do
   char '#'
   return (DefaultStyle, 1)
 
 -- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerAlpha = do
-  ch <- oneOf ['a'..'z']
+  ch <- satisfy isAsciiLower
   return (LowerAlpha, ord ch - ord 'a' + 1)
 
 -- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperAlpha = do
-  ch <- oneOf ['A'..'Z']
+  ch <- satisfy isAsciiUpper
   return (UpperAlpha, ord ch - ord 'A' + 1)
 
 -- | Parses a roman numeral i or I
-romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
            (char 'I' >> return (UpperRoman, 1))
 
 -- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
+anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes
 anyOrderedListMarker = choice
   [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
                            numParser <- [decimal, exampleNum, defaultNum, romanOne,
                            lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
 
 -- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Stream s m Char
+inPeriod :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m (ListNumberStyle, Int)
          -> ParserT s st m ListAttributes
 inPeriod num = try $ do
@@ -855,7 +962,7 @@ inPeriod num = try $ do
   return (start, style, delim)
 
 -- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Stream s m Char
+inOneParen :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m (ListNumberStyle, Int)
            -> ParserT s st m ListAttributes
 inOneParen num = try $ do
@@ -864,7 +971,7 @@ inOneParen num = try $ do
   return (start, style, OneParen)
 
 -- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Stream s m Char
+inTwoParens :: (Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m (ListNumberStyle, Int)
             -> ParserT s st m ListAttributes
 inTwoParens num = try $ do
@@ -875,7 +982,7 @@ inTwoParens num = try $ do
 
 -- | Parses an ordered list marker with a given style and delimiter,
 -- returns number.
-orderedListMarker :: Stream s m Char
+orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char)
                   => ListNumberStyle
                   -> ListNumberDelim
                   -> ParserT s ParserState m Int
@@ -898,10 +1005,10 @@ orderedListMarker style delim = do
   return start
 
 -- | Parses a character reference and returns a Str element.
-charRef :: Stream s m Char => ParserT s st m Inline
+charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline
 charRef = Str . T.singleton <$> characterReference
 
-lineBlockLine :: Monad m => ParserT Text st m Text
+lineBlockLine :: Monad m => ParserT Sources st m Text
 lineBlockLine = try $ do
   char '|'
   char ' '
@@ -911,11 +1018,11 @@ lineBlockLine = try $ do
   continuations <- many (try $ char ' ' >> anyLine)
   return $ white <> T.unwords (line : continuations)
 
-blankLineBlockLine :: Stream s m Char => ParserT s st m Char
+blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 blankLineBlockLine = try (char '|' >> blankline)
 
 -- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Monad m => ParserT Text st m [Text]
+lineBlockLines :: Monad m => ParserT Sources st m [Text]
 lineBlockLines = try $ do
   lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
   skipMany blankline
@@ -923,7 +1030,8 @@ lineBlockLines = try $ do
 
 -- | Parse a table using 'headerParser', 'rowParser',
 -- 'lineParser', and 'footerParser'.
-tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith :: (Stream s m Char, UpdateSourcePos s Char,
+              HasReaderOptions st, Monad mf)
           => ParserT s st m (mf [Blocks], [Alignment], [Int])
           -> ([Int] -> ParserT s st m (mf [Blocks]))
           -> ParserT s st m sep
@@ -943,7 +1051,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do
 
 type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
 
-tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
+               HasReaderOptions st, Monad mf)
            => ParserT s st m (mf [Blocks], [Alignment], [Int])
            -> ([Int] -> ParserT s st m (mf [Blocks]))
            -> ParserT s st m sep
@@ -992,20 +1101,19 @@ widthsFromIndices numColumns' indices =
 -- (which may be grid), then the rows,
 -- which may be grid, separated by blank lines, and
 -- ending with a footer (dashed line followed by blank line).
-gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                  Monad mf, IsString s)
-              => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf)
+              => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
               -> Bool                        -- ^ Headerless table
-              -> ParserT s st m (mf Blocks)
+              -> ParserT Sources st m (mf Blocks)
 gridTableWith blocks headless =
   tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
             (gridTableSep '-') gridTableFooter
 
-gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                   Monad mf, IsString s)
-               => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st,
+                   Monad mf)
+               => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
                -> Bool                        -- ^ Headerless table
-               -> ParserT s st m (TableComponents mf)
+               -> ParserT Sources st m (TableComponents mf)
 gridTableWith' blocks headless =
   tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
              (gridTableSep '-') gridTableFooter
@@ -1014,7 +1122,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
 gridTableSplitLine indices line = map removeFinalBar $ tail $
   splitTextByIndices (init indices) $ trimr line
 
-gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
+gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
 gridPart ch = do
   leftColon <- option False (True <$ char ':')
   dashes <- many1 (char ch)
@@ -1029,7 +1137,7 @@ gridPart ch = do
                        (False, False) -> AlignDefault
   return ((lengthDashes, lengthDashes + 1), alignment)
 
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
+gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)]
 gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
 
 removeFinalBar :: Text -> Text
@@ -1038,47 +1146,47 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
     go c = T.any (== c) " \t"
 
 -- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
+gridTableSep :: Monad m => Char -> ParserT Sources st m Char
 gridTableSep ch = try $ gridDashedLines ch >> return '\n'
 
 -- | Parse header for a grid table.
-gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
+gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st)
                 => Bool -- ^ Headerless table
-                -> ParserT s st m (mf Blocks)
-                -> ParserT s st m (mf [Blocks], [Alignment], [Int])
-gridTableHeader headless blocks = try $ do
+                -> ParserT Sources st m (mf Blocks)
+                -> ParserT Sources st m (mf [Blocks], [Alignment], [Int])
+gridTableHeader True _ = do
+  optional blanklines
+  dashes <- gridDashedLines '-'
+  let aligns = map snd dashes
+  let lines'   = map (snd . fst) dashes
+  let indices  = scanl (+) 0 lines'
+  return (return [], aligns, indices)
+gridTableHeader False blocks = try $ do
   optional blanklines
   dashes <- gridDashedLines '-'
-  rawContent  <- if headless
-                    then return $ repeat ""
-                    else many1
-                         (notFollowedBy (gridTableSep '=') >> char '|' >>
+  rawContent  <- many1 (notFollowedBy (gridTableSep '=') >> char '|' >>
                            T.pack <$> many1Till anyChar newline)
-  underDashes <- if headless
-                    then return dashes
-                    else gridDashedLines '='
+  underDashes <- gridDashedLines '='
   guard $ length dashes == length underDashes
   let lines'   = map (snd . fst) underDashes
   let indices  = scanl (+) 0 lines'
   let aligns   = map snd underDashes
-  let rawHeads = if headless
-                    then replicate (length underDashes) ""
-                    else map (T.unlines . map trim) $ transpose
+  let rawHeads = map (T.unlines . map trim) $ transpose
                        $ map (gridTableSplitLine indices) rawContent
   heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
   return (heads, aligns, indices)
 
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text]
+gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text]
 gridTableRawLine indices = do
   char '|'
   line <- many1Till anyChar newline
   return (gridTableSplitLine indices $ T.pack line)
 
 -- | Parse row of grid table.
-gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
-             => ParserT s st m (mf Blocks)
+gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st)
+             => ParserT Sources st m (mf Blocks)
              -> [Int]
-             -> ParserT s st m (mf [Blocks])
+             -> ParserT Sources st m (mf [Blocks])
 gridTableRow blocks indices = do
   colLines <- many1 (gridTableRawLine indices)
   let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
@@ -1099,34 +1207,38 @@ removeOneLeadingSpace xs =
            Just (c, _) -> c == ' '
 
 -- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s st m ()
+gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 gridTableFooter = optional blanklines
 
 ---
 
 -- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: (Stream s m Char, ToText s)
-          => ParserT s st m a    -- ^ parser
-          -> st                  -- ^ initial state
-          -> s                   -- ^ input
+readWithM :: (Monad m, ToSources t)
+          => ParserT Sources st m a  -- ^ parser
+          -> st                      -- ^ initial state
+          -> t                       -- ^ input
           -> m (Either PandocError a)
 readWithM parser state input =
-    mapLeft (PandocParsecError $ toText input) `liftM` runParserT parser state "source" input
+    mapLeft (PandocParsecError sources)
+      <$> runParserT parser state (initialSourceName sources) sources
+ where
+   sources = toSources input
 
 -- | Parse a string with a given parser and state
-readWith :: Parser Text st a
+readWith :: ToSources t
+         => Parser Sources st a
          -> st
-         -> Text
+         -> t
          -> Either PandocError a
 readWith p t inp = runIdentity $ readWithM p t inp
 
 -- | Parse a string with @parser@ (for testing).
 testStringWith :: Show a
-               => ParserT Text ParserState Identity a
+               => ParserT Sources ParserState Identity a
                -> Text
                -> IO ()
-testStringWith parser str = UTF8.putStrLn $ show $
-                            readWith parser defaultParserState str
+testStringWith parser str = UTF8.putStrLn $ tshow $
+                            readWith parser defaultParserState (toSources str)
 
 -- | Parsing options.
 data ParserState = ParserState
@@ -1146,7 +1258,7 @@ data ParserState = ParserState
       stateInNote            :: Bool,          -- ^ True if parsing note contents
       stateNoteNumber        :: Int,           -- ^ Last note number for citations
       stateMeta              :: Meta,          -- ^ Document metadata
-      stateMeta'             :: F Meta,        -- ^ Document metadata
+      stateMeta'             :: Future ParserState Meta, -- ^ Document metadata
       stateCitations         :: M.Map Text Text, -- ^ RST-style citations
       stateHeaderTable       :: [HeaderType],  -- ^ Ordered list of header types used
       stateIdentifiers       :: Set.Set Text, -- ^ Header identifiers used
@@ -1325,7 +1437,7 @@ data QuoteContext
 
 type NoteTable = [(Text, Text)]
 
-type NoteTable' = M.Map Text (SourcePos, F Blocks)
+type NoteTable' = M.Map Text (SourcePos, Future ParserState Blocks)
 -- used in markdown reader
 
 newtype Key = Key Text deriving (Show, Read, Eq, Ord)
@@ -1360,7 +1472,7 @@ registerHeader (ident,classes,kvs) header' = do
      then do
        let id' = uniqueIdent exts (B.toList header') ids
        let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
-                     then T.pack $ mapMaybe toAsciiChar $ T.unpack id'
+                     then toAsciiText id'
                      else id'
        updateState $ updateIdentifierList $ Set.insert id'
        updateState $ updateIdentifierList $ Set.insert id''
@@ -1373,34 +1485,42 @@ registerHeader (ident,classes,kvs) header' = do
           updateState $ updateIdentifierList $ Set.insert ident
         return (ident,classes,kvs)
 
-smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m Inlines
                  -> ParserT s st m Inlines
 smartPunctuation inlineParser = do
   guardEnabled Ext_smart
-  choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-
-apostrophe :: Stream s m Char => ParserT s st m Inlines
-apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
+  choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
 
-quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+quoted :: (HasLastStrPosition st, HasQuoteContext st m,
+           Stream s m Char, UpdateSourcePos s Char)
        => ParserT s st m Inlines
        -> ParserT s st m Inlines
 quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 
-singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
-singleQuoted inlineParser = try $ B.singleQuoted . mconcat
-  <$  singleQuoteStart
-  <*> withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)
-
-doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
+singleQuoted inlineParser = do
+  singleQuoteStart
+  (B.singleQuoted . mconcat <$>
+    try
+     (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
+   <|> pure "\8217"
+
+doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
-doubleQuoted inlineParser = try $ B.doubleQuoted . mconcat
-  <$  doubleQuoteStart
-  <*> withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)
+doubleQuoted inlineParser = do
+  doubleQuoteStart
+  (B.doubleQuoted . mconcat <$>
+    try
+     (withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)))
+   <|> pure (B.str "\8220")
 
 failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
                      => QuoteContext
@@ -1409,13 +1529,14 @@ failIfInQuoteContext context = do
   context' <- getQuoteContext
   when (context' == context) $ Prelude.fail "already inside quotes"
 
-charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char
+charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char
 charOrRef cs =
   oneOf cs <|> try (do c <- characterReference
                        guard (c `elem` cs)
                        return c)
 
-singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
@@ -1423,30 +1544,39 @@ singleQuoteStart = do
   guard =<< notAfterString
   try $ do
     charOrRef "'\8216\145"
-    notFollowedBy (oneOf [' ', '\t', '\n'])
+    void $ lookAhead (satisfy (not . isSpaceChar))
 
-singleQuoteEnd :: Stream s m Char
+singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 singleQuoteEnd = try $ do
   charOrRef "'\8217\146"
   notFollowedBy alphaNum
 
-doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
+doubleQuoteStart :: (HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
+  guard =<< notAfterString
   try $ do charOrRef "\"\8220\147"
-           notFollowedBy (oneOf [' ', '\t', '\n'])
+           void $ lookAhead (satisfy (not . isSpaceChar))
 
-doubleQuoteEnd :: Stream s m Char
+doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 doubleQuoteEnd = void (charOrRef "\"\8221\148")
 
-ellipses :: Stream s m Char
+apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
+apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
+
+doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
+doubleCloseQuote = B.str "\8221" <$ char '"'
+
+ellipses :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m Inlines
 ellipses = try (string "..." >> return (B.str "\8230"))
 
-dash :: (HasReaderOptions st, Stream s m Char)
+dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
      => ParserT s st m Inlines
 dash = try $ do
   oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
@@ -1473,20 +1603,28 @@ nested p = do
   updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
   return res
 
-citeKey :: (Stream s m Char, HasLastStrPosition st)
-        => ParserT s st m (Bool, Text)
-citeKey = try $ do
+citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
+        => Bool -- ^ If True, allow expanded @{..} syntax.
+        -> ParserT s st m (Bool, Text)
+citeKey allowBraced = try $ do
   guard =<< notAfterString
   suppress_author <- option False (True <$ char '-')
   char '@'
+  key <- simpleCiteIdentifier
+        <|> if allowBraced
+               then charsInBalanced '{' '}' (satisfy (not . isSpace))
+               else mzero
+  return (suppress_author, key)
+
+simpleCiteIdentifier :: (Stream s m Char, UpdateSourcePos s Char)
+                      => ParserT s st m Text
+simpleCiteIdentifier = do
   firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
   let regchar = satisfy (\c -> isAlphaNum c || c == '_')
   let internal p = try $ p <* lookAhead regchar
   rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
                  try (oneOf ":/" <* lookAhead (char '/'))
-  let key = firstChar:rest
-  return (suppress_author, T.pack key)
-
+  return $ T.pack $ firstChar:rest
 
 token :: (Stream s m t)
       => (t -> Text)
@@ -1506,12 +1644,15 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
     cls'   = maybe cls T.words $ lookup "class" kvs
     kvs'   = filter (\(k,_) -> k /= "id" || k /= "class") kvs
 
-insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT a st m (mf Blocks)
-                    -> (Text -> a)
-                    -> [FilePath] -> FilePath
-                    -> ParserT a st m (mf Blocks)
-insertIncludedFile' blocks totoks dirs f = do
+insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
+                   => ParserT a st m b -- ^ parser to apply
+                   -> (Text -> a) -- ^ convert Text to stream type
+                   -> [FilePath]  -- ^ search path (directories)
+                   -> FilePath    -- ^ path of file to include
+                   -> Maybe Int   -- ^ start line (negative counts from end)
+                   -> Maybe Int   -- ^ end line (negative counts from end)
+                   -> ParserT a st m b
+insertIncludedFile parser toStream dirs f mbstartline mbendline = do
   oldPos <- getPosition
   oldInput <- getInput
   containers <- getIncludeFiles <$> getState
@@ -1520,32 +1661,32 @@ insertIncludedFile' blocks totoks dirs f = do
   updateState $ addIncludeFile $ T.pack f
   mbcontents <- readFileFromDirs dirs f
   contents <- case mbcontents of
-                   Just s -> return s
+                   Just s -> return $ exciseLines mbstartline mbendline s
                    Nothing -> do
                      report $ CouldNotLoadIncludeFile (T.pack f) oldPos
                      return ""
-  setPosition $ newPos f 1 1
-  setInput $ totoks contents
-  bs <- blocks
+  setInput $ toStream contents
+  setPosition $ newPos f (fromMaybe 1 mbstartline) 1
+  result <- parser
   setInput oldInput
   setPosition oldPos
   updateState dropLatestIncludeFile
-  return bs
+  return result
+
+exciseLines :: Maybe Int -> Maybe Int -> Text -> Text
+exciseLines Nothing Nothing t = t
+exciseLines mbstartline mbendline t =
+  T.unlines $ take (endline' - (startline' - 1))
+            $ drop (startline' - 1) contentLines
+ where
+  contentLines = T.lines t
+  numLines = length contentLines
+  startline' = case mbstartline of
+                 Nothing -> 1
+                 Just x | x >= 0 -> x
+                        | otherwise -> numLines + x -- negative from end
+  endline' = case mbendline of
+                 Nothing -> numLines
+                 Just x | x >= 0 -> x
+                        | otherwise -> numLines + x -- negative from end
 
--- | Parse content of include file as blocks. Circular includes result in an
--- @PandocParseError@.
-insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
-                   => ParserT [a] st m Blocks
-                   -> (Text -> [a])
-                   -> [FilePath] -> FilePath
-                   -> ParserT [a] st m Blocks
-insertIncludedFile blocks totoks dirs f =
-  runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
-
--- | Parse content of include file as future blocks. Circular includes result in
--- an @PandocParseError@.
-insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT Text st m (Future st Blocks)
-                    -> [FilePath] -> FilePath
-                    -> ParserT Text st m (Future st Blocks)
-insertIncludedFileF p = insertIncludedFile' p id