From 4f3434586743afb69f00ca91fe6ec9b68b39ae7e Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 8 Jan 2021 18:38:20 +0100
Subject: Update copyright notices for 2021 (#7012)

---
 src/Text/Pandoc/Shared.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 4853621c8..b908a0172 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -8,7 +8,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Shared
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
-- 
cgit v1.2.3


From 2d60a5127cc28bb6b55c19309d6e8fb6e81fbe66 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 13 Feb 2021 09:36:19 +0100
Subject: T.P.Shared: export `handleTaskListItem`. [API change]

---
 src/Text/Pandoc/Shared.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index b908a0172..6d5d4c97d 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -70,6 +70,7 @@ module Text.Pandoc.Shared (
                      isTightList,
                      taskListItemFromAscii,
                      taskListItemToAscii,
+                     handleTaskListItem,
                      addMetaField,
                      makeMeta,
                      eastAsianLineBreakFilter,
-- 
cgit v1.2.3


From 9e728b40f36d48c687372ad447670186ed415337 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 17 Feb 2021 17:21:22 -0800
Subject: T.P.Shared: cleanup.

Cleanup up some functions and added deprecation pragmas
to funtions no longer used in the code base.
---
 src/Text/Pandoc/Shared.hs | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 6d5d4c97d..a579681b1 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -151,21 +151,22 @@ splitBy :: (a -> Bool) -> [a] -> [[a]]
 splitBy _ [] = []
 splitBy isSep lst =
   let (first, rest) = break isSep lst
-      rest'         = dropWhile isSep rest
-  in  first:splitBy isSep rest'
+  in  first:splitBy isSep (dropWhile isSep rest)
 
+-- | Split text by groups of one or more separator.
 splitTextBy :: (Char -> Bool) -> T.Text -> [T.Text]
 splitTextBy isSep t
   | T.null t = []
   | otherwise = let (first, rest) = T.break isSep t
-                    rest'         = T.dropWhile isSep rest
-                in  first : splitTextBy isSep rest'
+                in  first : splitTextBy isSep (T.dropWhile isSep rest)
 
+{-# DEPRECATED splitByIndices "This function is slated for removal" #-}
 splitByIndices :: [Int] -> [a] -> [[a]]
 splitByIndices [] lst = [lst]
 splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x)  xs) rest
   where (first, rest) = splitAt x lst
 
+{-# DEPRECATED splitStringByIndices "This function is slated for removal" #-}
 -- | Split string into chunks divided at specified indices.
 splitStringByIndices :: [Int] -> [Char] -> [[Char]]
 splitStringByIndices [] lst = [lst]
@@ -173,15 +174,22 @@ splitStringByIndices (x:xs) lst =
   let (first, rest) = splitAt' x lst in
   first : splitStringByIndices (map (\y -> y - x) xs) rest
 
-splitTextByIndices :: [Int] -> T.Text -> [T.Text]
-splitTextByIndices ns = fmap T.pack . splitStringByIndices ns . T.unpack
-
+-- DEPRECATED: can be removed when splitStringByIndices is
 splitAt' :: Int -> [Char] -> ([Char],[Char])
 splitAt' _ []          = ([],[])
 splitAt' n xs | n <= 0 = ([],xs)
 splitAt' n (x:xs)      = (x:ys,zs)
   where (ys,zs) = splitAt' (n - charWidth x) xs
 
+splitTextByIndices :: [Int] -> T.Text -> [T.Text]
+splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns))
+ where
+  splitTextByRelIndices [] t = [t]
+  splitTextByRelIndices (x:xs) t =
+    let (first, rest) = T.splitAt x t
+     in first : splitTextByRelIndices xs rest
+
+{-# DEPRECATED substitute "This function is slated for removal" #-}
 -- | Replace each occurrence of one sublist in a list with another.
 substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
 substitute _ _ [] = []
@@ -254,17 +262,24 @@ notElemText c = T.all (/= c)
 stripTrailingNewlines :: T.Text -> T.Text
 stripTrailingNewlines = T.dropWhileEnd (== '\n')
 
+isWS :: Char -> Bool
+isWS ' '  = True
+isWS '\r' = True
+isWS '\n' = True
+isWS '\t' = True
+isWS _    = False
+
 -- | Remove leading and trailing space (including newlines) from string.
 trim :: T.Text -> T.Text
-trim = T.dropAround (`elemText` " \r\n\t")
+trim = T.dropAround isWS
 
 -- | Remove leading space (including newlines) from string.
 triml :: T.Text -> T.Text
-triml = T.dropWhile (`elemText` " \r\n\t")
+triml = T.dropWhile isWS
 
 -- | Remove trailing space (including newlines) from string.
 trimr :: T.Text -> T.Text
-trimr = T.dropWhileEnd (`elemText` " \r\n\t")
+trimr = T.dropWhileEnd isWS
 
 -- | Trim leading space and trailing space unless after \.
 trimMath :: T.Text -> T.Text
@@ -275,7 +290,7 @@ trimMath = triml . T.reverse . stripBeginSpace . T.reverse -- no Text.spanEnd
       | Just ('\\', _) <- T.uncons suff = T.cons (T.last pref) suff
       | otherwise = suff
       where
-        (pref, suff) = T.span (`elemText` " \t\n\r") t
+        (pref, suff) = T.span isWS t
 
 -- | Strip leading and trailing characters from string
 stripFirstAndLast :: T.Text -> T.Text
-- 
cgit v1.2.3


From d8ef383692a167c97c67114107878a60d0aee6e1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 23:01:07 -0800
Subject: T.P.Shared: remove some obsolete functions [API change].

Removed:

- `splitByIndices`
- `splitStringByIndicies`
- `substitute`
- `underlineSpan`

None of these are used elsewhere in the code base.
---
 src/Text/Pandoc/Shared.hs | 44 +-------------------------------------------
 1 file changed, 1 insertion(+), 43 deletions(-)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index a579681b1..922df7922 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -21,10 +21,7 @@ module Text.Pandoc.Shared (
                      -- * List processing
                      splitBy,
                      splitTextBy,
-                     splitByIndices,
-                     splitStringByIndices,
                      splitTextByIndices,
-                     substitute,
                      ordNub,
                      findM,
                      -- * Text processing
@@ -74,7 +71,6 @@ module Text.Pandoc.Shared (
                      addMetaField,
                      makeMeta,
                      eastAsianLineBreakFilter,
-                     underlineSpan,
                      htmlSpanLikeElements,
                      splitSentences,
                      filterIpynbOutput,
@@ -113,7 +109,7 @@ import qualified Data.Bifunctor as Bifunctor
 import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
                   generalCategory, GeneralCategory(NonSpacingMark,
                   SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
-import Data.List (find, intercalate, intersperse, stripPrefix, sortOn)
+import Data.List (find, intercalate, intersperse, sortOn)
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Monoid (Any (..))
@@ -160,27 +156,6 @@ splitTextBy isSep t
   | otherwise = let (first, rest) = T.break isSep t
                 in  first : splitTextBy isSep (T.dropWhile isSep rest)
 
-{-# DEPRECATED splitByIndices "This function is slated for removal" #-}
-splitByIndices :: [Int] -> [a] -> [[a]]
-splitByIndices [] lst = [lst]
-splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x)  xs) rest
-  where (first, rest) = splitAt x lst
-
-{-# DEPRECATED splitStringByIndices "This function is slated for removal" #-}
--- | Split string into chunks divided at specified indices.
-splitStringByIndices :: [Int] -> [Char] -> [[Char]]
-splitStringByIndices [] lst = [lst]
-splitStringByIndices (x:xs) lst =
-  let (first, rest) = splitAt' x lst in
-  first : splitStringByIndices (map (\y -> y - x) xs) rest
-
--- DEPRECATED: can be removed when splitStringByIndices is
-splitAt' :: Int -> [Char] -> ([Char],[Char])
-splitAt' _ []          = ([],[])
-splitAt' n xs | n <= 0 = ([],xs)
-splitAt' n (x:xs)      = (x:ys,zs)
-  where (ys,zs) = splitAt' (n - charWidth x) xs
-
 splitTextByIndices :: [Int] -> T.Text -> [T.Text]
 splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns))
  where
@@ -189,16 +164,6 @@ splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns))
     let (first, rest) = T.splitAt x t
      in first : splitTextByRelIndices xs rest
 
-{-# DEPRECATED substitute "This function is slated for removal" #-}
--- | Replace each occurrence of one sublist in a list with another.
-substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
-substitute _ _ [] = []
-substitute [] _ xs = xs
-substitute target replacement lst@(x:xs) =
-    case stripPrefix target lst of
-      Just lst' -> replacement ++ substitute target replacement lst'
-      Nothing   -> x : substitute target replacement xs
-
 ordNub :: (Ord a) => [a] -> [a]
 ordNub l = go Set.empty l
   where
@@ -765,13 +730,6 @@ eastAsianLineBreakFilter = bottomUp go
         go xs
           = xs
 
-{-# DEPRECATED underlineSpan "Use Text.Pandoc.Builder.underline instead" #-}
--- | Builder for underline (deprecated).
--- This probably belongs in Builder.hs in pandoc-types.
--- Will be replaced once Underline is an element.
-underlineSpan :: Inlines -> Inlines
-underlineSpan = B.underline
-
 -- | Set of HTML elements that are represented as Span with a class equal as
 -- the element tag itself.
 htmlSpanLikeElements :: Set.Set T.Text
-- 
cgit v1.2.3


From 5f9327cfc8143902bbd3fdb9d97a7995a19fd217 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 5 Mar 2021 10:20:16 -0800
Subject: Shared: Change defaultUserDataDirs -> defaultUserDataDir.

Rationale: the manual says that the XDG data directory will
be used if it exists, otherwise the legacy data directory.
So we should just determine this and use this directory,
rather than having a search path which could cause some
things to be taken from one data directory and others from
others.

[API change]
---
 MANUAL.txt                | 12 ++++++------
 src/Text/Pandoc/Shared.hs | 20 ++++++++++++--------
 2 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/MANUAL.txt b/MANUAL.txt
index 667a784e0..d97cbcbc9 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -361,15 +361,15 @@ header when requesting a document from a URL:
     will be used.  On \*nix and macOS systems this will be the `pandoc`
     subdirectory of the XDG data directory (by default,
     `$HOME/.local/share`, overridable by setting the `XDG_DATA_HOME`
-    environment variable).  If that directory does not exist,
-    `$HOME/.pandoc` will be used (for backwards compatibility).
-    In Windows the default user data directory is
+    environment variable).  If that directory does not exist and
+    `$HOME/.pandoc` exists, it will be used (for backwards compatibility).
+    On Windows the default user data directory is
     `C:\Users\USERNAME\AppData\Roaming\pandoc`.
     You can find the default user data directory on your system by
     looking at the output of `pandoc --version`.
-    A `reference.odt`, `reference.docx`, `epub.css`, `templates`,
-    `slidy`, `slideous`, or `s5` directory
-    placed in this directory will override pandoc's normal defaults.
+    Data files placed in this directory (for example, `reference.odt`,
+    `reference.docx`, `epub.css`, `templates`) will override
+    pandoc's normal defaults.
 
 `-d` *FILE*, `--defaults=`*FILE*
 
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 922df7922..2aba9b2e1 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -95,7 +95,7 @@ module Text.Pandoc.Shared (
                      safeRead,
                      safeStrRead,
                      -- * User data directory
-                     defaultUserDataDirs,
+                     defaultUserDataDir,
                      -- * Version
                      pandocVersion
                     ) where
@@ -1012,12 +1012,16 @@ safeStrRead s = case reads s of
 --
 
 -- | Return appropriate user data directory for platform.  We use
--- XDG_DATA_HOME (or its default value), but fall back to the
--- legacy user data directory ($HOME/.pandoc on *nix) if this is
--- missing.
-defaultUserDataDirs :: IO [FilePath]
-defaultUserDataDirs = E.catch (do
+-- XDG_DATA_HOME (or its default value), but for backwards compatibility,
+-- we fall back to the legacy user data directory ($HOME/.pandoc on *nix)
+-- if the XDG_DATA_HOME is missing and this exists.  If neither directory
+-- is present, we return the XDG data directory.
+defaultUserDataDir :: IO FilePath
+defaultUserDataDir = do
   xdgDir <- getXdgDirectory XdgData "pandoc"
   legacyDir <- getAppUserDataDirectory "pandoc"
-  return $ ordNub [xdgDir, legacyDir])
- (\(_ :: E.SomeException) -> return [])
+  xdgExists <- doesDirectoryExist xdgDir
+  legacyDirExists <- doesDirectoryExist legacyDir
+  if not xdgExists && legacyDirExists
+     then return legacyDir
+     else return xdgDir
-- 
cgit v1.2.3


From 92ffd374754e28939a855fe84fb5455cb91383fa Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 11 Mar 2021 15:58:37 -0800
Subject: Simplify compactDL.

---
 src/Text/Pandoc/Shared.hs | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 2aba9b2e1..d11ad13f5 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -464,22 +464,20 @@ plainToPara :: Block -> Block
 plainToPara (Plain ils) = Para ils
 plainToPara x = x
 
+
 -- | Like @compactify@, but acts on items of definition lists.
 compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
 compactifyDL items =
-  let defs = concatMap snd items
-  in  case reverse (concatMap B.toList defs) of
-           (Para x:xs)
-             | not (any isPara xs) ->
-                   let (t,ds) = last items
-                       lastDef = B.toList $ last ds
-                       ds' = init ds ++
-                             if null lastDef
-                                then [B.fromList lastDef]
-                                else [B.fromList $ init lastDef ++ [Plain x]]
-                    in init items ++ [(t, ds')]
-             | otherwise           -> items
-           _                       -> items
+  case reverse items of
+        ((t,ds):ys) ->
+           case reverse (map (reverse . B.toList) ds) of
+             ((Para x:xs) : zs) | not (any isPara xs) ->
+                  reverse ys ++
+                    [(t, reverse (map B.fromList zs) ++
+                         [B.fromList (reverse (Plain x:xs))])]
+             _     -> items
+        _          -> items
+
 
 -- | Combine a list of lines by adding hard linebreaks.
 combineLines :: [[Inline]] -> [Inline]
-- 
cgit v1.2.3


From 24191a2a278c0dec30bacd66b78cbb8cc8d91324 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 15 Mar 2021 10:37:35 -0700
Subject: Use foldl' instead of foldl everywhere.

---
 src/Text/Pandoc/App/CommandLineOptions.hs           | 4 ++--
 src/Text/Pandoc/Citeproc/Locator.hs                 | 3 ++-
 src/Text/Pandoc/Class/PandocMonad.hs                | 3 ++-
 src/Text/Pandoc/Extensions.hs                       | 3 ++-
 src/Text/Pandoc/Lua/Filter.hs                       | 3 ++-
 src/Text/Pandoc/Readers/Docx/Combine.hs             | 4 ++--
 src/Text/Pandoc/Readers/HTML/TagCategories.hs       | 1 +
 src/Text/Pandoc/Readers/Markdown.hs                 | 6 +++---
 src/Text/Pandoc/Readers/Odt/Arrows/State.hs         | 4 ++--
 src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs | 4 ++--
 src/Text/Pandoc/Readers/Odt/StyleReader.hs          | 4 ++--
 src/Text/Pandoc/Readers/Textile.hs                  | 4 ++--
 src/Text/Pandoc/Shared.hs                           | 4 ++--
 src/Text/Pandoc/Writers/HTML.hs                     | 8 ++++----
 src/Text/Pandoc/Writers/RST.hs                      | 4 ++--
 src/Text/Pandoc/Writers/Texinfo.hs                  | 4 ++--
 16 files changed, 34 insertions(+), 29 deletions(-)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index b4483f756..a6df12715 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -31,7 +31,7 @@ import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder,
          defConfig, Indent(..), NumberFormat(..))
 import Data.Bifunctor (second)
 import Data.Char (toLower)
-import Data.List (intercalate, sort)
+import Data.List (intercalate, sort, foldl')
 #ifdef _WINDOWS
 #if MIN_VERSION_base(4,12,0)
 import Data.List (isPrefixOf)
@@ -93,7 +93,7 @@ parseOptionsFromArgs options' defaults prg rawArgs = do
         ("Try " ++ prg ++ " --help for more information.")
 
   -- thread option data structure through all supplied option actions
-  opts <- foldl (>>=) (return defaults) actions
+  opts <- foldl' (>>=) (return defaults) actions
   let mbArgs = case args of
                  [] -> Nothing
                  xs -> Just xs
diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs
index dba762c02..44416ca12 100644
--- a/src/Text/Pandoc/Citeproc/Locator.hs
+++ b/src/Text/Pandoc/Citeproc/Locator.hs
@@ -7,6 +7,7 @@ where
 import Citeproc.Types
 import Data.Text (Text)
 import qualified Data.Text as T
+import Data.List (foldl')
 import Text.Parsec
 import Text.Pandoc.Definition
 import Text.Pandoc.Parsing (romanNumeral)
@@ -139,7 +140,7 @@ pBalancedBraces braces p = try $ do
   where
       except = notFollowedBy pBraces >> p
       -- outer and inner
-      surround = foldl (\a (open, close) -> sur open close except <|> a)
+      surround = foldl' (\a (open, close) -> sur open close except <|> a)
                        except
                        braces
 
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 86c8de79e..293a822a0 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -59,6 +59,7 @@ import Control.Monad.Except (MonadError (catchError, throwError),
                              MonadTrans, lift, when)
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Data.Maybe (fromMaybe)
+import Data.List (foldl')
 import Data.Time (UTCTime)
 import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds,
                              posixSecondsToUTCTime)
@@ -612,7 +613,7 @@ checkExistence fn = do
 -- | Canonicalizes a file path by removing redundant @.@ and @..@.
 makeCanonical :: FilePath -> FilePath
 makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
- where  transformPathParts = reverse . foldl go []
+ where  transformPathParts = reverse . foldl' go []
         go as     "."  = as
         go (_:as) ".." = as
         go as     x    = x : as
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 3b96f9e04..266a09e3c 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -34,6 +34,7 @@ module Text.Pandoc.Extensions ( Extension(..)
 where
 import Data.Bits (clearBit, setBit, testBit, (.|.))
 import Data.Data (Data)
+import Data.List (foldl')
 import qualified Data.Text as T
 import Data.Typeable (Typeable)
 import GHC.Generics (Generic)
@@ -593,7 +594,7 @@ parseFormatSpec :: T.Text
 parseFormatSpec = parse formatSpec ""
   where formatSpec = do
           name <- formatName
-          (extsToEnable, extsToDisable) <- foldl (flip ($)) ([],[]) <$>
+          (extsToEnable, extsToDisable) <- foldl' (flip ($)) ([],[]) <$>
                                              many extMod
           return (T.pack name, reverse extsToEnable, reverse extsToDisable)
         formatName = many1 $ noneOf "-+"
diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
index bffe01a34..90967f295 100644
--- a/src/Text/Pandoc/Lua/Filter.hs
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -22,6 +22,7 @@ import Control.Monad.Catch (finally, try)
 import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf,
                   showConstr, toConstr, tyconUQname)
 import Data.Foldable (foldrM)
+import Data.List (foldl')
 import Data.Map (Map)
 import Data.Maybe (fromMaybe)
 import Foreign.Lua (Lua, Peekable, Pushable, StackIndex)
@@ -204,7 +205,7 @@ walkMeta lf (Pandoc m bs) = do
 
 walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc
 walkPandoc (LuaFilter fnMap) =
-  case foldl mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
+  case foldl' mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
     Just fn -> \x -> runFilterFunction fn x *> singleElement x
     Nothing -> return
 
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index bcf26c4a3..7c6d01769 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -182,7 +182,7 @@ isAttrModifier _                  = False
 
 smushInlines :: [Inlines] -> Inlines
 smushInlines xs = combineInlines xs' mempty
-  where xs' = foldl combineInlines mempty xs
+  where xs' = foldl' combineInlines mempty xs
 
 smushBlocks :: [Blocks] -> Blocks
-smushBlocks xs = foldl combineBlocks mempty xs
+smushBlocks xs = foldl' combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index b7bd40fee..67aba1cb1 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -23,6 +23,7 @@ where
 
 import Data.Set (Set, fromList, unions)
 import Data.Text (Text)
+import Data.List (foldl')
 
 eitherBlockOrInline :: Set Text
 eitherBlockOrInline = fromList
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 34edbcc17..dc94fc2d6 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -21,7 +21,7 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
-import Data.List (transpose, elemIndex, sortOn)
+import Data.List (transpose, elemIndex, sortOn, foldl')
 import qualified Data.Map as M
 import Data.Maybe
 import qualified Data.Set as Set
@@ -357,7 +357,7 @@ referenceKey = try $ do
   addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
                           >> many (try $ spnl >> keyValAttr)
   blanklines
-  let attr'  = extractIdClass $ foldl (\x f -> f x) attr addKvs
+  let attr'  = extractIdClass $ foldl' (\x f -> f x) attr addKvs
       target = (escapeURI $ trimr src, tit)
   st <- getState
   let oldkeys = stateKeys st
@@ -613,7 +613,7 @@ attributes = try $ do
   spnl
   attrs <- many (attribute <* spnl)
   char '}'
-  return $ foldl (\x f -> f x) nullAttr attrs
+  return $ foldl' (\x f -> f x) nullAttr attrs
 
 attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
 attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
index 93c6b5e79..96515bf56 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -22,7 +22,7 @@ module Text.Pandoc.Readers.Odt.Arrows.State where
 import Control.Arrow
 import qualified Control.Category as Cat
 import Control.Monad
-
+import Data.List (foldl')
 import Text.Pandoc.Readers.Odt.Arrows.Utils
 import Text.Pandoc.Readers.Odt.Generic.Fallible
 
@@ -122,7 +122,7 @@ iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
 iterateSL :: (Foldable f, MonadPlus m)
           => ArrowState s    x     y
           -> ArrowState s (f x) (m y)
-iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
+iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f
   where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x)
 
 
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 0d921e23b..341903046 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -64,12 +64,12 @@ import qualified Data.Map             as M
 import           Data.Text (Text)
 import           Data.Default
 import           Data.Maybe
+import           Data.List (foldl')
 
 import qualified Text.Pandoc.XML.Light as XML
 
 import           Text.Pandoc.Readers.Odt.Arrows.State
 import           Text.Pandoc.Readers.Odt.Arrows.Utils
-
 import           Text.Pandoc.Readers.Odt.Generic.Namespaces
 import           Text.Pandoc.Readers.Odt.Generic.Utils
 import           Text.Pandoc.Readers.Odt.Generic.Fallible
@@ -293,7 +293,7 @@ readNSattributes         = fromState $ \state -> maybe (state, failEmpty     )
                          => XMLConverterState nsID extraState
                          -> Maybe (XMLConverterState nsID extraState)
     extractNSAttrs startState
-                         = foldl (\state d -> state >>= addNS d)
+                         = foldl' (\state d -> state >>= addNS d)
                                  (Just startState)
                                  nsAttribs
       where nsAttribs    = mapMaybe readNSattr (XML.elAttribs element)
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 5e10f896c..b722aa07d 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -44,7 +44,7 @@ import Control.Arrow
 
 import Data.Default
 import qualified Data.Foldable as F
-import Data.List (unfoldr)
+import Data.List (unfoldr, foldl')
 import qualified Data.Map as M
 import Data.Maybe
 import Data.Text (Text)
@@ -120,7 +120,7 @@ fontPitchReader = executeInSub NsOffice "font-face-decls" (
                               &&&
                               lookupDefaultingAttr NsStyle "font-pitch"
                             ))
-                    >>?^ ( M.fromList . foldl accumLegalPitches [] )
+                    >>?^ ( M.fromList . foldl' accumLegalPitches [] )
                   ) `ifFailedDo` returnV (Right M.empty)
   where accumLegalPitches ls (Nothing,_) = ls
         accumLegalPitches ls (Just n,p)  = (n,p):ls
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 860da2dc3..99238c7f0 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -38,7 +38,7 @@ module Text.Pandoc.Readers.Textile ( readTextile) where
 import Control.Monad (guard, liftM)
 import Control.Monad.Except (throwError)
 import Data.Char (digitToInt, isUpper)
-import Data.List (intersperse, transpose)
+import Data.List (intersperse, transpose, foldl')
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup (Tag (..), fromAttrib)
@@ -627,7 +627,7 @@ code2 = do
 
 -- | Html / CSS attributes
 attributes :: PandocMonad m => ParserT Text ParserState m Attr
-attributes = foldl (flip ($)) ("",[],[]) <$>
+attributes = foldl' (flip ($)) ("",[],[]) <$>
   try (do special <- option id specialAttribute
           attrs <- many attribute
           return (special : attrs))
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d11ad13f5..0ce9396b3 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -109,7 +109,7 @@ import qualified Data.Bifunctor as Bifunctor
 import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
                   generalCategory, GeneralCategory(NonSpacingMark,
                   SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
-import Data.List (find, intercalate, intersperse, sortOn)
+import Data.List (find, intercalate, intersperse, sortOn, foldl')
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Monoid (Any (..))
@@ -840,7 +840,7 @@ mapLeft = Bifunctor.first
 -- > collapseFilePath "parent/foo/.." ==  "parent"
 -- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
 collapseFilePath :: FilePath -> FilePath
-collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
+collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories
   where
     go rs "." = rs
     go r@(p:rs) ".." = case p of
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 2f33cd467..332de1545 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -30,7 +30,7 @@ module Text.Pandoc.Writers.HTML (
   ) where
 import Control.Monad.State.Strict
 import Data.Char (ord)
-import Data.List (intercalate, intersperse, partition, delete, (\\))
+import Data.List (intercalate, intersperse, partition, delete, (\\), foldl')
 import Data.List.NonEmpty (NonEmpty((:|)))
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import qualified Data.Set as Set
@@ -544,7 +544,7 @@ tagWithAttributes opts html5 selfClosing tagname attr =
 
 addAttrs :: PandocMonad m
          => WriterOptions -> Attr -> Html -> StateT WriterState m Html
-addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr
+addAttrs opts attr h = foldl' (!) h <$> attrsToHtml opts attr
 
 toAttrs :: PandocMonad m
         => [(Text, Text)] -> StateT WriterState m [Attribute]
@@ -926,7 +926,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
                                    numstyle']
                    else [])
   l <- ordList opts contents
-  return $ foldl (!) l attribs
+  return $ foldl' (!) l attribs
 blockToHtml opts (DefinitionList lst) = do
   contents <- mapM (\(term, defs) ->
                   do term' <- liftM H.dt $ inlineListToHtml opts term
@@ -1407,7 +1407,7 @@ inlineToHtml opts inline = do
                               Just "audio" -> mediaTag H5.audio "Audio"
                               Just _       -> (H5.embed, [])
                               _            -> imageTag
-                        return $ foldl (!) tag $ attributes ++ specAttrs
+                        return $ foldl' (!) tag $ attributes ++ specAttrs
                         -- note:  null title included, as in Markdown.pl
     (Note contents) -> do
                         notes <- gets stNotes
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index d01e13db4..54d042332 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -16,7 +16,7 @@ reStructuredText:  <http://docutils.sourceforge.net/rst.html>
 module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
 import Control.Monad.State.Strict
 import Data.Char (isSpace)
-import Data.List (transpose, intersperse)
+import Data.List (transpose, intersperse, foldl')
 import Data.Maybe (fromMaybe)
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -509,7 +509,7 @@ flatten outer
   | null contents = [outer]
   | otherwise     = combineAll contents
   where contents = dropInlineParent outer
-        combineAll = foldl combine []
+        combineAll = foldl' combine []
 
         combine :: [Inline] -> Inline -> [Inline]
         combine f i =
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 53da70f84..9d695563f 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -15,7 +15,7 @@ module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
 import Data.Char (chr, ord)
-import Data.List (maximumBy, transpose)
+import Data.List (maximumBy, transpose, foldl')
 import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
@@ -271,7 +271,7 @@ tableAnyRowToTexinfo :: PandocMonad m
                      -> [[Block]]
                      -> TI m (Doc Text)
 tableAnyRowToTexinfo itemtype aligns cols =
-  (literal itemtype $$) . foldl (\row item -> row $$
+  (literal itemtype $$) . foldl' (\row item -> row $$
   (if isEmpty row then empty else text " @tab ") <> item) empty <$> zipWithM alignedBlock aligns cols
 
 alignedBlock :: PandocMonad m
-- 
cgit v1.2.3


From 005f0fbcd558636f0d5db1203427a4d7b341f36e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 12:34:20 -0700
Subject: T.P.Shared: Remove ToString, ToText typeclasses [API change].

T.P.Parsing: revise type of readWithM so that it takes a Text
rather than a polymorphic ToText value.

These typeclasses were there to ease the transition from String
to Text. They are no longer needed, and they may clash with
more useful versions under the same name.

This will require a bump to 2.13.
---
 src/Text/Pandoc/Parsing.hs |  8 ++++----
 src/Text/Pandoc/Shared.hs  | 20 --------------------
 2 files changed, 4 insertions(+), 24 deletions(-)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 8d3799c3e..10a08d410 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1128,13 +1128,13 @@ gridTableFooter = optional blanklines
 ---
 
 -- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: (Stream s m Char, ToText s)
-          => ParserT s st m a    -- ^ parser
+readWithM :: Monad m
+          => ParserT Text st m a -- ^ parser
           -> st                  -- ^ initial state
-          -> s                   -- ^ input
+          -> Text                -- ^ input
           -> m (Either PandocError a)
 readWithM parser state input =
-    mapLeft (PandocParsecError $ toText input) `liftM` runParserT parser state "source" input
+    mapLeft (PandocParsecError input) <$> runParserT parser state "source" input
 
 -- | Parse a string with a given parser and state
 readWith :: Parser Text st a
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 0ce9396b3..46aea9c03 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -25,8 +25,6 @@ module Text.Pandoc.Shared (
                      ordNub,
                      findM,
                      -- * Text processing
-                     ToString (..),
-                     ToText (..),
                      tshow,
                      backslashEscapes,
                      escapeStringUsing,
@@ -183,24 +181,6 @@ findM p = foldr go (pure Nothing)
 -- Text processing
 --
 
-class ToString a where
-  toString :: a -> String
-
-instance ToString String where
-  toString = id
-
-instance ToString T.Text where
-  toString = T.unpack
-
-class ToText a where
-  toText :: a -> T.Text
-
-instance ToText String where
-  toText = T.pack
-
-instance ToText T.Text where
-  toText = id
-
 tshow :: Show a => a -> T.Text
 tshow = T.pack . show
 
-- 
cgit v1.2.3


From a1a57bce4e32cc26b968bcc2847a8e8da30f725b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 00:02:24 -0700
Subject: T.P.Shared: remove `backslashEscapes`, `escapeStringUsing`.

[API change]

These are inefficient association list lookups.
Replace with more efficient functions in the writers that
used them (with 10-25% performance improvements in
haddock, org, rtf, texinfo writers).
---
 src/Text/Pandoc/Shared.hs           | 13 -------------
 src/Text/Pandoc/Writers/AsciiDoc.hs | 11 ++++++++---
 src/Text/Pandoc/Writers/ConTeXt.hs  |  6 +++++-
 src/Text/Pandoc/Writers/Haddock.hs  | 15 +++++++++++++--
 src/Text/Pandoc/Writers/LaTeX.hs    | 14 +++++++++++++-
 src/Text/Pandoc/Writers/Org.hs      | 15 +++++++++------
 src/Text/Pandoc/Writers/RTF.hs      | 26 ++++++++++++++++----------
 src/Text/Pandoc/Writers/Texinfo.hs  | 24 +++++++++++++-----------
 8 files changed, 77 insertions(+), 47 deletions(-)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 46aea9c03..23adff909 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -26,8 +26,6 @@ module Text.Pandoc.Shared (
                      findM,
                      -- * Text processing
                      tshow,
-                     backslashEscapes,
-                     escapeStringUsing,
                      elemText,
                      notElemText,
                      stripTrailingNewlines,
@@ -184,17 +182,6 @@ findM p = foldr go (pure Nothing)
 tshow :: Show a => a -> T.Text
 tshow = T.pack . show
 
--- | Returns an association list of backslash escapes for the
--- designated characters.
-backslashEscapes :: [Char]    -- ^ list of special characters to escape
-                 -> [(Char, T.Text)]
-backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch]))
-
--- | Escape a string of characters, using an association list of
--- characters and strings.
-escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text
-escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl
-
 -- | @True@ exactly when the @Char@ appears in the @Text@.
 elemText :: Char -> T.Text -> Bool
 elemText c = T.any (== c)
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 69e608ef9..ab7e5f1a9 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -105,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
 
 -- | Escape special characters for AsciiDoc.
 escapeString :: Text -> Text
-escapeString = escapeStringUsing escs
-  where escs = backslashEscapes "{"
+escapeString t
+  | T.any (== '{') t = T.concatMap escChar t
+  | otherwise        = t
+  where escChar '{' = "\\{"
+        escChar c   = T.singleton c
 
 -- | Ordered list start parser for use in Para below.
 olMarker :: Parser Text ParserState Char
@@ -496,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do
         | otherwise     -> [Str "``"] ++ lst ++ [Str "''"]
 inlineToAsciiDoc _ (Code _ str) = do
   isAsciidoctor <- gets asciidoctorVariant
-  let contents = literal (escapeStringUsing (backslashEscapes "`") str)
+  let escChar '`' = "\\'"
+      escChar c   = T.singleton c
+  let contents = literal (T.concatMap escChar str)
   return $
     if isAsciidoctor
        then text "`+" <> contents <> "+`"
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 1c56388ed..3c9975be8 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -434,9 +434,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do
   put $ st {stNextRef = next + 1}
   let ref = "url" <> tshow next
   contents <-  inlineListToConTeXt txt
+  let escChar '#' = "\\#"
+      escChar '%' = "\\%"
+      escChar c   = T.singleton c
+  let escContextURL = T.concatMap escChar
   return $ "\\useURL"
            <> brackets (literal ref)
-           <> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
+           <> brackets (literal $ escContextURL src)
            <> (if isAutolink
                   then empty
                   else brackets empty <> brackets contents)
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index aaa19ed07..75e14714b 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -15,6 +15,7 @@ Haddock:  <http://www.haskell.org/haddock/doc/html/>
 -}
 module Text.Pandoc.Writers.Haddock (writeHaddock) where
 import Control.Monad.State.Strict
+import Data.Char (isAlphaNum)
 import Data.Default
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -71,8 +72,18 @@ notesToHaddock opts notes =
 
 -- | Escape special characters for Haddock.
 escapeString :: Text -> Text
-escapeString = escapeStringUsing haddockEscapes
-  where haddockEscapes = backslashEscapes "\\/'`\"@<"
+escapeString t
+  | T.all isAlphaNum t = t
+  | otherwise = T.concatMap escChar t
+ where
+  escChar '\\' = "\\\\"
+  escChar '/'  = "\\/"
+  escChar '\'' = "\\'"
+  escChar '`'  = "\\`"
+  escChar '"'  = "\\\""
+  escChar '@'  = "\\@"
+  escChar '<'  = "\\<"
+  escChar c    = T.singleton c
 
 -- | Convert Pandoc block element to haddock.
 blockToHaddock :: PandocMonad m
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 6a205a798..1c970e6ad 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -825,7 +825,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do
         let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of
                        (c:_) -> c
                        []    -> '!'
-        let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str
+        let isEscapable '\\' = True
+            isEscapable '{'  = True
+            isEscapable '}'  = True
+            isEscapable '%'  = True
+            isEscapable '~'  = True
+            isEscapable '_'  = True
+            isEscapable '&'  = True
+            isEscapable '#'  = True
+            isEscapable '^'  = True
+            isEscapable _    = False
+        let escChar c | isEscapable c = T.pack ['\\',c]
+                      | otherwise     = T.singleton c
+        let str' = T.concatMap escChar str
         -- we always put lstinline in a dummy 'passthrough' command
         -- (defined in the default template) so that we don't have
         -- to change the way we escape characters depending on whether
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index bb645eaf9..88a2b8314 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -84,12 +84,15 @@ noteToOrg num note = do
 
 -- | Escape special characters for Org.
 escapeString :: Text -> Text
-escapeString = escapeStringUsing
-               [ ('\x2014',"---")
-               , ('\x2013',"--")
-               , ('\x2019',"'")
-               , ('\x2026',"...")
-               ]
+escapeString t
+  | T.all (\c -> c < '\x2013' || c > '\x2026') t = t
+  | otherwise = T.concatMap escChar t
+  where
+   escChar '\x2013' = "--"
+   escChar '\x2014' = "---"
+   escChar '\x2019' = "'"
+   escChar '\x2026' = "..."
+   escChar c        = T.singleton c
 
 isRawFormat :: Format -> Bool
 isRawFormat f =
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index cf27011c2..3527949b4 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF
 import Control.Monad.Except (catchError, throwError)
 import Control.Monad
 import qualified Data.ByteString as B
-import Data.Char (chr, isDigit, ord)
+import Data.Char (chr, isDigit, ord, isAlphaNum)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c ->
 
 -- | Escape special characters.
 escapeSpecial :: Text -> Text
-escapeSpecial = escapeStringUsing $
-  [ ('\t',"\\tab ")
-  , ('\8216',"\\u8216'")
-  , ('\8217',"\\u8217'")
-  , ('\8220',"\\u8220\"")
-  , ('\8221',"\\u8221\"")
-  , ('\8211',"\\u8211-")
-  , ('\8212',"\\u8212-")
-  ] <> backslashEscapes "{\\}"
+escapeSpecial t
+  | T.all isAlphaNum t = t
+  | otherwise          = T.concatMap escChar t
+ where
+  escChar '\t' = "\\tab "
+  escChar '\8216' = "\\u8216'"
+  escChar '\8217' = "\\u8217'"
+  escChar '\8220' = "\\u8220\""
+  escChar '\8221' = "\\u8221\""
+  escChar '\8211' = "\\u8211-"
+  escChar '\8212' = "\\u8212-"
+  escChar '{'     = "\\{"
+  escChar '}'     = "\\}"
+  escChar '\\'    = "\\\\"
+  escChar c       = T.singleton c
 
 -- | Escape strings as needed for rich text format.
 stringToRTF :: Text -> Text
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 0146fdfd8..6a33b4283 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -14,7 +14,7 @@ Conversion of 'Pandoc' format into Texinfo.
 module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
-import Data.Char (chr, ord)
+import Data.Char (chr, ord, isAlphaNum)
 import Data.List (maximumBy, transpose, foldl')
 import Data.List.NonEmpty (nonEmpty)
 import Data.Ord (comparing)
@@ -85,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do
 
 -- | Escape things as needed for Texinfo.
 stringToTexinfo :: Text -> Text
-stringToTexinfo = escapeStringUsing texinfoEscapes
-  where texinfoEscapes = [ ('{', "@{")
-                         , ('}', "@}")
-                         , ('@', "@@")
-                         , ('\160', "@ ")
-                         , ('\x2014', "---")
-                         , ('\x2013', "--")
-                         , ('\x2026', "@dots{}")
-                         , ('\x2019', "'")
-                         ]
+stringToTexinfo t
+  | T.all isAlphaNum t = t
+  | otherwise = T.concatMap escChar t
+  where escChar '{'      = "@{"
+        escChar '}'      = "@}"
+        escChar '@'      = "@@"
+        escChar '\160'   = "@ "
+        escChar '\x2014' = "---"
+        escChar '\x2013' = "--"
+        escChar '\x2026' = "@dots{}"
+        escChar '\x2019' = "'"
+        escChar c        = T.singleton c
 
 escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text)
 escapeCommas parser = do
-- 
cgit v1.2.3


From 052056289fc6f884a2a8799dacca64a16248a5c2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Mar 2021 23:38:47 -0700
Subject: Simplify T.P.Asciify and export toAsciiText [API change].

Instead of encoding a giant (and incomplete) map, we now
just use unicode-transforms to normalize the text to
a canonical decomposition, and manipulate the result.

The new `toAsciiText` is equivalent to the old
`T.pack . mapMaybe toAsciiChar . T.unpack` but should be faster.
---
 src/Text/Pandoc/Asciify.hs | 403 ++-------------------------------------------
 src/Text/Pandoc/Parsing.hs |   6 +-
 src/Text/Pandoc/Shared.hs  |   4 +-
 3 files changed, 18 insertions(+), 395 deletions(-)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
index 01a7b624a..620546c13 100644
--- a/src/Text/Pandoc/Asciify.hs
+++ b/src/Text/Pandoc/Asciify.hs
@@ -10,396 +10,19 @@
 Function to convert accented latin letters to their unaccented
 ascii equivalents (used in constructing HTML identifiers).
 -}
-module Text.Pandoc.Asciify (toAsciiChar)
+module Text.Pandoc.Asciify (toAsciiChar, toAsciiText)
 where
-import Data.Char (isAscii)
-import qualified Data.Map as M
+import Data.Char (isAscii, isMark)
+import qualified Data.Text.Normalize as TN
+import Data.Text (Text)
+import qualified Data.Text as T
 
-toAsciiChar :: Char -> Maybe Char
-toAsciiChar c | isAscii c = Just c
-              | otherwise = M.lookup c asciiMap
+toAsciiText :: Text -> Text
+toAsciiText = T.filter isAscii . TN.normalize (TN.NFD)
 
-asciiMap :: M.Map Char Char
-asciiMap = M.fromList
-  [('\192','A')
-  ,('\193','A')
-  ,('\194','A')
-  ,('\195','A')
-  ,('\196','A')
-  ,('\197','A')
-  ,('\199','C')
-  ,('\200','E')
-  ,('\201','E')
-  ,('\202','E')
-  ,('\203','E')
-  ,('\204','I')
-  ,('\205','I')
-  ,('\206','I')
-  ,('\207','I')
-  ,('\209','N')
-  ,('\210','O')
-  ,('\211','O')
-  ,('\212','O')
-  ,('\213','O')
-  ,('\214','O')
-  ,('\217','U')
-  ,('\218','U')
-  ,('\219','U')
-  ,('\220','U')
-  ,('\221','Y')
-  ,('\224','a')
-  ,('\225','a')
-  ,('\226','a')
-  ,('\227','a')
-  ,('\228','a')
-  ,('\229','a')
-  ,('\231','c')
-  ,('\232','e')
-  ,('\233','e')
-  ,('\234','e')
-  ,('\235','e')
-  ,('\236','i')
-  ,('\237','i')
-  ,('\238','i')
-  ,('\239','i')
-  ,('\241','n')
-  ,('\242','o')
-  ,('\243','o')
-  ,('\244','o')
-  ,('\245','o')
-  ,('\246','o')
-  ,('\249','u')
-  ,('\250','u')
-  ,('\251','u')
-  ,('\252','u')
-  ,('\253','y')
-  ,('\255','y')
-  ,('\256','A')
-  ,('\257','a')
-  ,('\258','A')
-  ,('\259','a')
-  ,('\260','A')
-  ,('\261','a')
-  ,('\262','C')
-  ,('\263','c')
-  ,('\264','C')
-  ,('\265','c')
-  ,('\266','C')
-  ,('\267','c')
-  ,('\268','C')
-  ,('\269','c')
-  ,('\270','D')
-  ,('\271','d')
-  ,('\274','E')
-  ,('\275','e')
-  ,('\276','E')
-  ,('\277','e')
-  ,('\278','E')
-  ,('\279','e')
-  ,('\280','E')
-  ,('\281','e')
-  ,('\282','E')
-  ,('\283','e')
-  ,('\284','G')
-  ,('\285','g')
-  ,('\286','G')
-  ,('\287','g')
-  ,('\288','G')
-  ,('\289','g')
-  ,('\290','G')
-  ,('\291','g')
-  ,('\292','H')
-  ,('\293','h')
-  ,('\296','I')
-  ,('\297','i')
-  ,('\298','I')
-  ,('\299','i')
-  ,('\300','I')
-  ,('\301','i')
-  ,('\302','I')
-  ,('\303','i')
-  ,('\304','I')
-  ,('\305','i')
-  ,('\308','J')
-  ,('\309','j')
-  ,('\310','K')
-  ,('\311','k')
-  ,('\313','L')
-  ,('\314','l')
-  ,('\315','L')
-  ,('\316','l')
-  ,('\317','L')
-  ,('\318','l')
-  ,('\323','N')
-  ,('\324','n')
-  ,('\325','N')
-  ,('\326','n')
-  ,('\327','N')
-  ,('\328','n')
-  ,('\332','O')
-  ,('\333','o')
-  ,('\334','O')
-  ,('\335','o')
-  ,('\336','O')
-  ,('\337','o')
-  ,('\340','R')
-  ,('\341','r')
-  ,('\342','R')
-  ,('\343','r')
-  ,('\344','R')
-  ,('\345','r')
-  ,('\346','S')
-  ,('\347','s')
-  ,('\348','S')
-  ,('\349','s')
-  ,('\350','S')
-  ,('\351','s')
-  ,('\352','S')
-  ,('\353','s')
-  ,('\354','T')
-  ,('\355','t')
-  ,('\356','T')
-  ,('\357','t')
-  ,('\360','U')
-  ,('\361','u')
-  ,('\362','U')
-  ,('\363','u')
-  ,('\364','U')
-  ,('\365','u')
-  ,('\366','U')
-  ,('\367','u')
-  ,('\368','U')
-  ,('\369','u')
-  ,('\370','U')
-  ,('\371','u')
-  ,('\372','W')
-  ,('\373','w')
-  ,('\374','Y')
-  ,('\375','y')
-  ,('\376','Y')
-  ,('\377','Z')
-  ,('\378','z')
-  ,('\379','Z')
-  ,('\380','z')
-  ,('\381','Z')
-  ,('\382','z')
-  ,('\416','O')
-  ,('\417','o')
-  ,('\431','U')
-  ,('\432','u')
-  ,('\461','A')
-  ,('\462','a')
-  ,('\463','I')
-  ,('\464','i')
-  ,('\465','O')
-  ,('\466','o')
-  ,('\467','U')
-  ,('\468','u')
-  ,('\486','G')
-  ,('\487','g')
-  ,('\488','K')
-  ,('\489','k')
-  ,('\490','O')
-  ,('\491','o')
-  ,('\496','j')
-  ,('\500','G')
-  ,('\501','g')
-  ,('\504','N')
-  ,('\505','n')
-  ,('\512','A')
-  ,('\513','a')
-  ,('\514','A')
-  ,('\515','a')
-  ,('\516','E')
-  ,('\517','e')
-  ,('\518','E')
-  ,('\519','e')
-  ,('\520','I')
-  ,('\521','i')
-  ,('\522','I')
-  ,('\523','i')
-  ,('\524','O')
-  ,('\525','o')
-  ,('\526','O')
-  ,('\527','o')
-  ,('\528','R')
-  ,('\529','r')
-  ,('\530','R')
-  ,('\531','r')
-  ,('\532','U')
-  ,('\533','u')
-  ,('\534','U')
-  ,('\535','u')
-  ,('\536','S')
-  ,('\537','s')
-  ,('\538','T')
-  ,('\539','t')
-  ,('\542','H')
-  ,('\543','h')
-  ,('\550','A')
-  ,('\551','a')
-  ,('\552','E')
-  ,('\553','e')
-  ,('\558','O')
-  ,('\559','o')
-  ,('\562','Y')
-  ,('\563','y')
-  ,('\894',';')
-  ,('\7680','A')
-  ,('\7681','a')
-  ,('\7682','B')
-  ,('\7683','b')
-  ,('\7684','B')
-  ,('\7685','b')
-  ,('\7686','B')
-  ,('\7687','b')
-  ,('\7690','D')
-  ,('\7691','d')
-  ,('\7692','D')
-  ,('\7693','d')
-  ,('\7694','D')
-  ,('\7695','d')
-  ,('\7696','D')
-  ,('\7697','d')
-  ,('\7698','D')
-  ,('\7699','d')
-  ,('\7704','E')
-  ,('\7705','e')
-  ,('\7706','E')
-  ,('\7707','e')
-  ,('\7710','F')
-  ,('\7711','f')
-  ,('\7712','G')
-  ,('\7713','g')
-  ,('\7714','H')
-  ,('\7715','h')
-  ,('\7716','H')
-  ,('\7717','h')
-  ,('\7718','H')
-  ,('\7719','h')
-  ,('\7720','H')
-  ,('\7721','h')
-  ,('\7722','H')
-  ,('\7723','h')
-  ,('\7724','I')
-  ,('\7725','i')
-  ,('\7728','K')
-  ,('\7729','k')
-  ,('\7730','K')
-  ,('\7731','k')
-  ,('\7732','K')
-  ,('\7733','k')
-  ,('\7734','L')
-  ,('\7735','l')
-  ,('\7738','L')
-  ,('\7739','l')
-  ,('\7740','L')
-  ,('\7741','l')
-  ,('\7742','M')
-  ,('\7743','m')
-  ,('\7744','M')
-  ,('\7745','m')
-  ,('\7746','M')
-  ,('\7747','m')
-  ,('\7748','N')
-  ,('\7749','n')
-  ,('\7750','N')
-  ,('\7751','n')
-  ,('\7752','N')
-  ,('\7753','n')
-  ,('\7754','N')
-  ,('\7755','n')
-  ,('\7764','P')
-  ,('\7765','p')
-  ,('\7766','P')
-  ,('\7767','p')
-  ,('\7768','R')
-  ,('\7769','r')
-  ,('\7770','R')
-  ,('\7771','r')
-  ,('\7774','R')
-  ,('\7775','r')
-  ,('\7776','S')
-  ,('\7777','s')
-  ,('\7778','S')
-  ,('\7779','s')
-  ,('\7786','T')
-  ,('\7787','t')
-  ,('\7788','T')
-  ,('\7789','t')
-  ,('\7790','T')
-  ,('\7791','t')
-  ,('\7792','T')
-  ,('\7793','t')
-  ,('\7794','U')
-  ,('\7795','u')
-  ,('\7796','U')
-  ,('\7797','u')
-  ,('\7798','U')
-  ,('\7799','u')
-  ,('\7804','V')
-  ,('\7805','v')
-  ,('\7806','V')
-  ,('\7807','v')
-  ,('\7808','W')
-  ,('\7809','w')
-  ,('\7810','W')
-  ,('\7811','w')
-  ,('\7812','W')
-  ,('\7813','w')
-  ,('\7814','W')
-  ,('\7815','w')
-  ,('\7816','W')
-  ,('\7817','w')
-  ,('\7818','X')
-  ,('\7819','x')
-  ,('\7820','X')
-  ,('\7821','x')
-  ,('\7822','Y')
-  ,('\7823','y')
-  ,('\7824','Z')
-  ,('\7825','z')
-  ,('\7826','Z')
-  ,('\7827','z')
-  ,('\7828','Z')
-  ,('\7829','z')
-  ,('\7830','h')
-  ,('\7831','t')
-  ,('\7832','w')
-  ,('\7833','y')
-  ,('\7840','A')
-  ,('\7841','a')
-  ,('\7842','A')
-  ,('\7843','a')
-  ,('\7864','E')
-  ,('\7865','e')
-  ,('\7866','E')
-  ,('\7867','e')
-  ,('\7868','E')
-  ,('\7869','e')
-  ,('\7880','I')
-  ,('\7881','i')
-  ,('\7882','I')
-  ,('\7883','i')
-  ,('\7884','O')
-  ,('\7885','o')
-  ,('\7886','O')
-  ,('\7887','o')
-  ,('\7908','U')
-  ,('\7909','u')
-  ,('\7910','U')
-  ,('\7911','u')
-  ,('\7922','Y')
-  ,('\7923','y')
-  ,('\7924','Y')
-  ,('\7925','y')
-  ,('\7926','Y')
-  ,('\7927','y')
-  ,('\7928','Y')
-  ,('\7929','y')
-  ,('\8175','`')
-  ,('\8490','K')
-  ,('\8800','=')
-  ,('\8814','<')
-  ,('\8815','>')
-  ]
+toAsciiChar :: Char -> Maybe Char
+toAsciiChar c = case T.unpack (TN.normalize TN.NFD (T.singleton c)) of
+                  (x:xs) | isAscii x
+                         , all isMark xs
+                         -> Just x
+                  _      -> Nothing
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 0c2078721..847fd2e05 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -195,13 +195,13 @@ import Data.Default
 import Data.Functor (($>))
 import Data.List (intercalate, transpose)
 import qualified Data.Map as M
-import Data.Maybe (mapMaybe, fromMaybe)
+import Data.Maybe (fromMaybe)
 import qualified Data.Set as Set
 import Data.String
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup.Entity (lookupEntity)
-import Text.Pandoc.Asciify (toAsciiChar)
+import Text.Pandoc.Asciify (toAsciiText)
 import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
@@ -1380,7 +1380,7 @@ registerHeader (ident,classes,kvs) header' = do
      then do
        let id' = uniqueIdent exts (B.toList header') ids
        let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
-                     then T.pack $ mapMaybe toAsciiChar $ T.unpack id'
+                     then toAsciiText id'
                      else id'
        updateState $ updateIdentifierList $ Set.insert id'
        updateState $ updateIdentifierList $ Set.insert id''
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 23adff909..3292b32f4 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -123,7 +123,7 @@ import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions,
 import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..))
 import qualified Text.Pandoc.Builder as B
 import Data.Time
-import Text.Pandoc.Asciify (toAsciiChar)
+import Text.Pandoc.Asciify (toAsciiText)
 import Text.Pandoc.Definition
 import Text.Pandoc.Extensions (Extensions, Extension(..), extensionEnabled)
 import Text.Pandoc.Generic (bottomUp)
@@ -478,7 +478,7 @@ inlineListToIdentifier exts =
       | otherwise = T.dropWhile (not . isAlpha)
     filterAscii
       | extensionEnabled Ext_ascii_identifiers exts
-        = T.pack . mapMaybe toAsciiChar . T.unpack
+        = toAsciiText
       | otherwise = id
     toIdent
       | extensionEnabled Ext_gfm_auto_identifiers exts =
-- 
cgit v1.2.3


From 20cd33e5a44810b68fed74da00f4f51eb2282147 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 8 Apr 2021 14:47:11 -0700
Subject: Fix regression in grid tables for wide characters.

In the translation from String to Text, a char-width-sensitive
splitAt' was dropped.  This commit reinstates it.
Closes #7214.
---
 src/Text/Pandoc/Shared.hs | 18 +++++++++++++-----
 test/command/7214.md      | 28 ++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 5 deletions(-)
 create mode 100644 test/command/7214.md

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 3292b32f4..95cbdc8b8 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -153,12 +153,20 @@ splitTextBy isSep t
                 in  first : splitTextBy isSep (T.dropWhile isSep rest)
 
 splitTextByIndices :: [Int] -> T.Text -> [T.Text]
-splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns))
+splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack
  where
-  splitTextByRelIndices [] t = [t]
-  splitTextByRelIndices (x:xs) t =
-    let (first, rest) = T.splitAt x t
-     in first : splitTextByRelIndices xs rest
+  splitTextByRelIndices [] cs = [T.pack cs]
+  splitTextByRelIndices (x:xs) cs =
+    let (first, rest) = splitAt' x cs
+     in T.pack first : splitTextByRelIndices xs rest
+
+-- Note: don't replace this with T.splitAt, which is not sensitive
+-- to character widths!
+splitAt' :: Int -> [Char] -> ([Char],[Char])
+splitAt' _ []          = ([],[])
+splitAt' n xs | n <= 0 = ([],xs)
+splitAt' n (x:xs)      = (x:ys,zs)
+  where (ys,zs) = splitAt' (n - charWidth x) xs
 
 ordNub :: (Ord a) => [a] -> [a]
 ordNub l = go Set.empty l
diff --git a/test/command/7214.md b/test/command/7214.md
new file mode 100644
index 000000000..43bf9e4ca
--- /dev/null
+++ b/test/command/7214.md
@@ -0,0 +1,28 @@
+```
+% pandoc
++------------+----------+------------------+
+|日本語      | の文字列 | words in english |
++------------+----------+------------------+
+|abc defghij | def      | xyz              |
++------------+----------+------------------+
+^D
+<table style="width:60%;">
+<colgroup>
+<col style="width: 18%" />
+<col style="width: 15%" />
+<col style="width: 26%" />
+</colgroup>
+<tbody>
+<tr class="odd">
+<td>日本語</td>
+<td>の文字列</td>
+<td>words in english</td>
+</tr>
+<tr class="even">
+<td>abc defghij</td>
+<td>def</td>
+<td>xyz</td>
+</tr>
+</tbody>
+</table>
+```
-- 
cgit v1.2.3


From 7ba8c0d2a5e2b89ae1547759510b2ee21de88cb1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 11 Apr 2021 21:28:19 -0700
Subject: Move getLang from BCP47 -> T.P.Writers.Shared.

[API change]
---
 pandoc.cabal                          |   1 +
 src/Text/Pandoc/BCP47.hs              |  13 ----
 src/Text/Pandoc/Citeproc/Data.hs      |   5 +-
 src/Text/Pandoc/Shared.hs             |   1 -
 src/Text/Pandoc/Writers/LaTeX/Lang.hs | 117 +++++++++++++++++-----------------
 src/Text/Pandoc/Writers/Shared.hs     |  14 ++++
 6 files changed, 77 insertions(+), 74 deletions(-)

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/pandoc.cabal b/pandoc.cabal
index c8ef3cfb9..b6cbb0d7a 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -493,6 +493,7 @@ library
                  unordered-containers  >= 0.2      && < 0.3,
                  xml                   >= 1.3.12   && < 1.4,
                  xml-conduit           >= 1.9.1.1  && < 1.10,
+                 unicode-collation     >= 0.1      && < 0.2,
                  zip-archive           >= 0.2.3.4  && < 0.5,
                  zlib                  >= 0.5      && < 0.7
   if os(windows) && arch(i386)
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
index 69824aa57..1ecf0bf73 100644
--- a/src/Text/Pandoc/BCP47.hs
+++ b/src/Text/Pandoc/BCP47.hs
@@ -37,19 +37,6 @@ renderLang :: Lang -> T.Text
 renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null)
                     ([langScript lang, langRegion lang] ++ langVariants lang))
 
--- | Get the contents of the `lang` metadata field or variable.
-getLang :: WriterOptions -> Meta -> Maybe T.Text
-getLang opts meta =
-  case lookupContext "lang" (writerVariables opts) of
-        Just s -> Just s
-        _      ->
-          case lookupMeta "lang" meta of
-               Just (MetaBlocks [Para [Str s]])  -> Just s
-               Just (MetaBlocks [Plain [Str s]]) -> Just s
-               Just (MetaInlines [Str s])        -> Just s
-               Just (MetaString s)               -> Just s
-               _                                 -> Nothing
-
 -- | Parse a BCP 47 string as a Lang.  Currently we parse
 -- extensions and private-use fields as "variants," even
 -- though officially they aren't.
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
index dfdaf2598..40430b0f5 100644
--- a/src/Text/Pandoc/Citeproc/Data.hs
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE
 import qualified Data.Text as T
 import Data.Text (Text)
 import Text.Pandoc.Citeproc.Util (toIETF)
-import Citeproc (Lang(..), parseLang)
+import UnicodeCollation.Lang (Lang(..), parseLang)
 
 biblatexLocalizations :: [(FilePath, ByteString)]
 biblatexLocalizations = $(embedDir "citeproc/biblatex-localization")
@@ -21,7 +21,8 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text))
 biblatexStringMap = foldr go mempty biblatexLocalizations
  where
   go (fp, bs) =
-    let Lang lang _ = parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp)
+    let Lang lang _ _ _ _ _ = parseLang
+                                (toIETF $ T.takeWhile (/= '.') $ T.pack fp)
         ls = T.lines $ TE.decodeUtf8 bs
      in if length ls > 4
            then M.insert lang (toStringMap $ map (T.splitOn "|") ls)
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 95cbdc8b8..e389c1727 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -852,7 +852,6 @@ filteredFilesFromArchive zf f =
     fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
     fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
 
-
 --
 -- IANA URIs
 --
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
index 41aafee48..871b2692a 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -15,7 +15,7 @@ module Text.Pandoc.Writers.LaTeX.Lang
     toBabel
   ) where
 import Data.Text (Text)
-import Text.Pandoc.BCP47 (Lang (..))
+import UnicodeCollation.Lang (Lang(..))
 
 
 -- In environments \Arabic instead of \arabic is used
@@ -25,88 +25,89 @@ toPolyglossiaEnv l =
     ("arabic", o) -> ("Arabic", o)
     x             -> x
 
--- Takes a list of the constituents of a BCP 47 language code and
+-- Takes a list of the constituents of a BCP47 language code and
 -- converts it to a Polyglossia (language, options) tuple
 -- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
 toPolyglossia :: Lang -> (Text, Text)
-toPolyglossia (Lang "ar" _ "DZ" _)        = ("arabic", "locale=algeria")
-toPolyglossia (Lang "ar" _ "IQ" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "JO" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "LB" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "LY" _)        = ("arabic", "locale=libya")
-toPolyglossia (Lang "ar" _ "MA" _)        = ("arabic", "locale=morocco")
-toPolyglossia (Lang "ar" _ "MR" _)        = ("arabic", "locale=mauritania")
-toPolyglossia (Lang "ar" _ "PS" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "SY" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "TN" _)        = ("arabic", "locale=tunisia")
-toPolyglossia (Lang "de" _ _ vars)
-  | "1901" `elem` vars                    = ("german", "spelling=old")
-toPolyglossia (Lang "de" _ "AT" vars)
-  | "1901" `elem` vars                    = ("german", "variant=austrian, spelling=old")
-toPolyglossia (Lang "de" _ "AT" _)        = ("german", "variant=austrian")
-toPolyglossia (Lang "de" _ "CH" vars)
-  | "1901" `elem` vars                    = ("german", "variant=swiss, spelling=old")
-toPolyglossia (Lang "de" _ "CH" _)        = ("german", "variant=swiss")
-toPolyglossia (Lang "de" _ _ _)           = ("german", "")
-toPolyglossia (Lang "dsb" _ _ _)          = ("lsorbian", "")
-toPolyglossia (Lang "el" _ "polyton" _)   = ("greek",   "variant=poly")
-toPolyglossia (Lang "en" _ "AU" _)        = ("english", "variant=australian")
-toPolyglossia (Lang "en" _ "CA" _)        = ("english", "variant=canadian")
-toPolyglossia (Lang "en" _ "GB" _)        = ("english", "variant=british")
-toPolyglossia (Lang "en" _ "NZ" _)        = ("english", "variant=newzealand")
-toPolyglossia (Lang "en" _ "UK" _)        = ("english", "variant=british")
-toPolyglossia (Lang "en" _ "US" _)        = ("english", "variant=american")
-toPolyglossia (Lang "grc" _ _ _)          = ("greek",   "variant=ancient")
-toPolyglossia (Lang "hsb" _ _  _)         = ("usorbian", "")
-toPolyglossia (Lang "la" _ _ vars)
-  | "x-classic" `elem` vars               = ("latin", "variant=classic")
-toPolyglossia (Lang "pt" _ "BR" _)        = ("portuguese", "variant=brazilian")
-toPolyglossia (Lang "sl" _ _ _)           = ("slovenian", "")
+toPolyglossia (Lang "ar" _ (Just "DZ") _ _ _)  = ("arabic", "locale=algeria")
+toPolyglossia (Lang "ar" _ (Just "IQ") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "JO") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "LB") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "LY") _ _ _)  = ("arabic", "locale=libya")
+toPolyglossia (Lang "ar" _ (Just "MA") _ _ _)  = ("arabic", "locale=morocco")
+toPolyglossia (Lang "ar" _ (Just "MR") _ _ _)  = ("arabic", "locale=mauritania")
+toPolyglossia (Lang "ar" _ (Just "PS") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "SY") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "TN") _ _ _)  = ("arabic", "locale=tunisia")
+toPolyglossia (Lang "de" _ _ vars _ _)
+  | "1901" `elem` vars                         = ("german", "spelling=old")
+toPolyglossia (Lang "de" _ (Just "AT") vars _ _)
+  | "1901" `elem` vars         = ("german", "variant=austrian, spelling=old")
+toPolyglossia (Lang "de" _ (Just "AT") _ _ _)  = ("german", "variant=austrian")
+toPolyglossia (Lang "de" _ (Just "CH") vars _ _)
+  | "1901" `elem` vars        = ("german", "variant=swiss, spelling=old")
+toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss")
+toPolyglossia (Lang "de" _ _ _ _ _)           = ("german", "")
+toPolyglossia (Lang "dsb" _ _ _ _ _)          = ("lsorbian", "")
+toPolyglossia (Lang "el" _ _ vars _ _)
+  | "polyton" `elem` vars                     = ("greek",   "variant=poly")
+toPolyglossia (Lang "en" _ (Just "AU") _ _ _) = ("english", "variant=australian")
+toPolyglossia (Lang "en" _ (Just "CA") _ _ _) = ("english", "variant=canadian")
+toPolyglossia (Lang "en" _ (Just "GB") _ _ _) = ("english", "variant=british")
+toPolyglossia (Lang "en" _ (Just "NZ") _ _ _) = ("english", "variant=newzealand")
+toPolyglossia (Lang "en" _ (Just "UK") _ _ _) = ("english", "variant=british")
+toPolyglossia (Lang "en" _ (Just "US") _ _ _) = ("english", "variant=american")
+toPolyglossia (Lang "grc" _ _ _ _ _)          = ("greek",   "variant=ancient")
+toPolyglossia (Lang "hsb" _ _ _ _ _)          = ("usorbian", "")
+toPolyglossia (Lang "la" _ _ vars _ _)
+  | "x-classic" `elem` vars                   = ("latin", "variant=classic")
+toPolyglossia (Lang "pt" _ "BR" _ _ _)        = ("portuguese", "variant=brazilian")
+toPolyglossia (Lang "sl" _ _ _ _ _)           = ("slovenian", "")
 toPolyglossia x                           = (commonFromBcp47 x, "")
 
--- Takes a list of the constituents of a BCP 47 language code and
+-- Takes a list of the constituents of a BCP47 language code and
 -- converts it to a Babel language string.
 -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
 -- List of supported languages (slightly outdated):
 -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
 toBabel :: Lang -> Text
-toBabel (Lang "de" _ "AT" vars)
+toBabel (Lang "de" _ (Just "AT") vars _ _)
   | "1901" `elem` vars                  = "austrian"
   | otherwise                           = "naustrian"
-toBabel (Lang "de" _ "CH" vars)
+toBabel (Lang "de" _ (Just "CH") vars _ _)
   | "1901" `elem` vars                  = "swissgerman"
   | otherwise                           = "nswissgerman"
-toBabel (Lang "de" _ _ vars)
+toBabel (Lang "de" _ _ vars _ _)
   | "1901" `elem` vars                  = "german"
   | otherwise                           = "ngerman"
-toBabel (Lang "dsb" _ _ _)              = "lowersorbian"
+toBabel (Lang "dsb" _ _ _ _ _)          = "lowersorbian"
 toBabel (Lang "el" _ _ vars)
   | "polyton" `elem` vars               = "polutonikogreek"
-toBabel (Lang "en" _ "AU" _)            = "australian"
-toBabel (Lang "en" _ "CA" _)            = "canadian"
-toBabel (Lang "en" _ "GB" _)            = "british"
-toBabel (Lang "en" _ "NZ" _)            = "newzealand"
-toBabel (Lang "en" _ "UK" _)            = "british"
-toBabel (Lang "en" _ "US" _)            = "american"
-toBabel (Lang "fr" _ "CA" _)            = "canadien"
-toBabel (Lang "fra" _ _ vars)
+toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian"
+toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian"
+toBabel (Lang "en" _ (Just "GB") _ _ _) = "british"
+toBabel (Lang "en" _ (Just "NZ") _ _ _) = "newzealand"
+toBabel (Lang "en" _ (Just "UK") _ _ _) = "british"
+toBabel (Lang "en" _ (Just "US") _ _ _) = "american"
+toBabel (Lang "fr" _ (Just "CA") _ _ _) = "canadien"
+toBabel (Lang "fra" _ _ vars _ _)
   | "aca" `elem` vars                   = "acadian"
-toBabel (Lang "grc" _ _ _)              = "polutonikogreek"
-toBabel (Lang "hsb" _ _ _)              = "uppersorbian"
-toBabel (Lang "la" _ _ vars)
+toBabel (Lang "grc" _ _ _ _ _)          = "polutonikogreek"
+toBabel (Lang "hsb" _ _ _ _ _)          = "uppersorbian"
+toBabel (Lang "la" _ _ vars _ _)
   | "x-classic" `elem` vars             = "classiclatin"
-toBabel (Lang "pt" _ "BR" _)            = "brazilian"
-toBabel (Lang "sl" _ _ _)               = "slovene"
+toBabel (Lang "pt" _ (Just "BR") _ _ _) = "brazilian"
+toBabel (Lang "sl" _ _ _ _ _)           = "slovene"
 toBabel x                               = commonFromBcp47 x
 
--- Takes a list of the constituents of a BCP 47 language code
+-- Takes a list of the constituents of a BCP47 language code
 -- and converts it to a string shared by Babel and Polyglossia.
 -- https://tools.ietf.org/html/bcp47#section-2.1
 commonFromBcp47 :: Lang -> Text
-commonFromBcp47 (Lang "sr" "Cyrl" _ _)          = "serbianc"
-commonFromBcp47 (Lang "zh" "Latn" _ vars)
-  | "pinyin" `elem` vars                        = "pinyin"
-commonFromBcp47 (Lang l _ _ _) = fromIso l
+commonFromBcp47 (Lang "sr" (Just "Cyrl") _ _ _ _)      = "serbianc"
+commonFromBcp47 (Lang "zh" (Just "Latn") _ vars _ _)
+  | "pinyin" `elem` vars                               = "pinyin"
+commonFromBcp47 (Lang l _ _ _ _ _) = fromIso l
   where
     fromIso "af"  = "afrikaans"
     fromIso "am"  = "amharic"
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 91ecb310b..fcb47bd5a 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -20,6 +20,7 @@ module Text.Pandoc.Writers.Shared (
                      , setField
                      , resetField
                      , defField
+                     , getLang
                      , tagWithAttrs
                      , isDisplayMath
                      , fixDisplayMath
@@ -147,6 +148,19 @@ defField field val (Context m) =
   where
     f _newval oldval = oldval
 
+-- | Get the contents of the `lang` metadata field or variable.
+getLang :: WriterOptions -> Meta -> Maybe Text
+getLang opts meta =
+  case lookupContext "lang" (writerVariables opts) of
+        Just s -> Just s
+        _      ->
+          case lookupMeta "lang" meta of
+               Just (MetaBlocks [Para [Str s]])  -> Just s
+               Just (MetaBlocks [Plain [Str s]]) -> Just s
+               Just (MetaInlines [Str s])        -> Just s
+               Just (MetaString s)               -> Just s
+               _                                 -> Nothing
+
 -- | Produce an HTML tag with the given pandoc attributes.
 tagWithAttrs :: HasChars a => T.Text -> Attr -> Doc a
 tagWithAttrs tag (ident,classes,kvs) = hsep
-- 
cgit v1.2.3


From 6e45607f9948f45b2e94f54b4825b667ca0d5441 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 1 May 2021 13:17:45 -0700
Subject: Change reader types, allowing better tracking of source positions.

Previously, when multiple file arguments were provided, pandoc
simply concatenated them and passed the contents to the readers,
which took a Text argument.

As a result, the readers had no way of knowing which file
was the source of any particular bit of text.  This meant that
we couldn't report accurate source positions on errors or
include accurate source positions as attributes in the AST.
More seriously, it meant that we couldn't resolve resource
paths relative to the files containing them
(see e.g. #5501, #6632, #6384, #3752).

Add Text.Pandoc.Sources (exported module), with a `Sources` type
and a `ToSources` class.  A `Sources` wraps a list of `(SourcePos,
Text)` pairs. [API change] A parsec `Stream` instance is provided for
`Sources`.  The module also exports versions of parsec's `satisfy` and
other Char parsers that track source positions accurately from a
`Sources` stream (or any instance of the new `UpdateSourcePos` class).

Text.Pandoc.Parsing now exports these modified Char parsers instead of
the ones parsec provides.  Modified parsers to use a `Sources` as stream
[API change].

The readers that previously took a `Text` argument have been
modified to take any instance of `ToSources`. So, they may still
be used with a `Text`, but they can also be used with a `Sources`
object.

In Text.Pandoc.Error, modified the constructor PandocParsecError
to take a `Sources` rather than a `Text` as first argument,
so parse error locations can be accurately reported.

T.P.Error: showPos, do not print "-" as source name.
---
 .hlint.yaml                               |   1 +
 pandoc.cabal                              |   1 +
 src/Text/Pandoc/App.hs                    |   8 +-
 src/Text/Pandoc/App/Opt.hs                |   2 +-
 src/Text/Pandoc/Citeproc/BibTeX.hs        |  14 +-
 src/Text/Pandoc/Error.hs                  |  39 ++-
 src/Text/Pandoc/Logging.hs                |   8 +-
 src/Text/Pandoc/Parsing.hs                | 474 ++++++++++++++++++------------
 src/Text/Pandoc/Readers.hs                |  97 +++---
 src/Text/Pandoc/Readers/BibTeX.hs         |  13 +-
 src/Text/Pandoc/Readers/CSV.hs            |  14 +-
 src/Text/Pandoc/Readers/CommonMark.hs     |  70 +++--
 src/Text/Pandoc/Readers/Creole.hs         |  11 +-
 src/Text/Pandoc/Readers/CslJson.hs        |   9 +-
 src/Text/Pandoc/Readers/DocBook.hs        |  11 +-
 src/Text/Pandoc/Readers/DokuWiki.hs       |  17 +-
 src/Text/Pandoc/Readers/FB2.hs            |   9 +-
 src/Text/Pandoc/Readers/HTML.hs           |  33 ++-
 src/Text/Pandoc/Readers/Haddock.hs        |  12 +-
 src/Text/Pandoc/Readers/Ipynb.hs          |   8 +-
 src/Text/Pandoc/Readers/JATS.hs           |  11 +-
 src/Text/Pandoc/Readers/Jira.hs           |  16 +-
 src/Text/Pandoc/Readers/LaTeX.hs          |  23 +-
 src/Text/Pandoc/Readers/LaTeX/Citation.hs |   2 +-
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs  |  12 +-
 src/Text/Pandoc/Readers/LaTeX/Types.hs    |  15 +-
 src/Text/Pandoc/Readers/Man.hs            |  29 +-
 src/Text/Pandoc/Readers/Markdown.hs       |  49 +--
 src/Text/Pandoc/Readers/MediaWiki.hs      |  13 +-
 src/Text/Pandoc/Readers/Metadata.hs       |  26 +-
 src/Text/Pandoc/Readers/Muse.hs           |  17 +-
 src/Text/Pandoc/Readers/Native.hs         |  12 +-
 src/Text/Pandoc/Readers/OPML.hs           |  13 +-
 src/Text/Pandoc/Readers/Org.hs            |  11 +-
 src/Text/Pandoc/Readers/Org/Inlines.hs    |   3 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs    |   2 +-
 src/Text/Pandoc/Readers/RST.hs            |  76 ++---
 src/Text/Pandoc/Readers/Roff.hs           |  34 +--
 src/Text/Pandoc/Readers/TWiki.hs          |  12 +-
 src/Text/Pandoc/Readers/Textile.hs        | 172 +++++------
 src/Text/Pandoc/Readers/TikiWiki.hs       |  12 +-
 src/Text/Pandoc/Readers/Txt2Tags.hs       |  12 +-
 src/Text/Pandoc/Readers/Vimwiki.hs        |  19 +-
 src/Text/Pandoc/Shared.hs                 |   1 +
 src/Text/Pandoc/Sources.hs                | 195 ++++++++++++
 test/Tests/Readers/Markdown.hs            |   4 +-
 46 files changed, 1025 insertions(+), 617 deletions(-)
 create mode 100644 src/Text/Pandoc/Sources.hs

(limited to 'src/Text/Pandoc/Shared.hs')

diff --git a/.hlint.yaml b/.hlint.yaml
index 350794803..ad0f7ddb9 100644
--- a/.hlint.yaml
+++ b/.hlint.yaml
@@ -9,6 +9,7 @@
 # Ignore some builtin hints
 #
 - ignore: {name: "Avoid lambda"}
+- ignore: {name: "Use bimap"}
 - ignore: {name: "Eta reduce"}
 - ignore: {name: "Evaluate"}
 - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained
diff --git a/pandoc.cabal b/pandoc.cabal
index 8ea3aa681..de7951c54 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -513,6 +513,7 @@ library
                    Text.Pandoc.Options,
                    Text.Pandoc.Extensions,
                    Text.Pandoc.Shared,
+                   Text.Pandoc.Sources,
                    Text.Pandoc.MediaBag,
                    Text.Pandoc.Error,
                    Text.Pandoc.Filter,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 96e4b5f47..98b072ffb 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -160,9 +160,11 @@ convertWithOpts opts = do
                                     else optTabStop opts)
 
 
-    let readSources :: [FilePath] -> PandocIO Text
-        readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
-                              mapM readSource srcs
+    let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
+        readSources srcs =
+          mapM (\fp -> do
+                   t <- readSource fp
+                   return (if fp == "-" then "" else fp, convertTabs t)) srcs
 
 
     outputSettings <- optToOutputSettings opts
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index c72f63464..d54d932b7 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -687,7 +687,7 @@ yamlToMeta (Mapping _ _ m) =
   where
     pMetaString = pure . MetaString <$> P.manyChar P.anyChar
     runEverything p =
-      runPure (P.readWithM p (def :: P.ParserState) "")
+      runPure (P.readWithM p (def :: P.ParserState) ("" :: Text))
       >>= fmap (Meta . flip P.runF def)
 yamlToMeta _ = return mempty
 
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 510e56f9c..f6833000c 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -59,10 +59,11 @@ data Variant = Bibtex | Biblatex
   deriving (Show, Eq, Ord)
 
 -- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
-readBibtexString :: Variant           -- ^ bibtex or biblatex
+readBibtexString :: ToSources a
+                 => Variant           -- ^ bibtex or biblatex
                  -> Locale            -- ^ Locale
                  -> (Text -> Bool)    -- ^ Filter on citation ids
-                 -> Text              -- ^ bibtex/biblatex text
+                 -> a                 -- ^ bibtex/biblatex text
                  -> Either ParseError [Reference Inlines]
 readBibtexString variant locale idpred contents = do
   case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
@@ -70,7 +71,7 @@ readBibtexString variant locale idpred contents = do
                       filter (\item -> idpred (identifier item) &&
                                         entryType item /= "xdata"))
            (fromMaybe defaultLang $ localeLanguage locale, Map.empty)
-           "" contents of
+           "" (toSources contents) of
           Left err -> Left err
           Right xs -> return xs
 
@@ -339,7 +340,7 @@ defaultLang = Lang "en" Nothing (Just "US") [] [] []
 -- a map of bibtex "string" macros
 type StringMap = Map.Map Text Text
 
-type BibParser = Parser Text (Lang, StringMap)
+type BibParser = Parser Sources (Lang, StringMap)
 
 data Item = Item{ identifier :: Text
                 , sourcePos  :: SourcePos
@@ -804,7 +805,7 @@ bibEntries = do
                        (bibComment <|> bibPreamble <|> bibString))
 
 bibSkip :: BibParser ()
-bibSkip = () <$ take1WhileP (/='@')
+bibSkip = skipMany1 (satisfy (/='@'))
 
 bibComment :: BibParser ()
 bibComment = do
@@ -829,6 +830,9 @@ bibString = do
   updateState (\(l,m) -> (l, Map.insert k v m))
   return ()
 
+take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text
+take1WhileP f = T.pack <$> many1 (satisfy f)
+
 inBraces :: BibParser Text
 inBraces = do
   char '{'
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 8102f04cc..81eb41f85 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -23,26 +23,27 @@ import Control.Exception (Exception, displayException)
 import Data.Typeable (Typeable)
 import Data.Word (Word8)
 import Data.Text (Text)
+import Data.List (sortOn)
 import qualified Data.Text as T
+import Data.Ord (Down(..))
 import GHC.Generics (Generic)
 import Network.HTTP.Client (HttpException)
 import System.Exit (ExitCode (..), exitWith)
 import System.IO (stderr)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (Sources(..))
 import Text.Printf (printf)
 import Text.Parsec.Error
 import Text.Parsec.Pos hiding (Line)
 import Text.Pandoc.Shared (tshow)
 import Citeproc (CiteprocError, prettyCiteprocError)
 
-type Input = Text
-
 data PandocError = PandocIOError Text IOError
                  | PandocHttpError Text HttpException
                  | PandocShouldNeverHappenError Text
                  | PandocSomeError Text
                  | PandocParseError Text
-                 | PandocParsecError Input ParseError
+                 | PandocParsecError Sources ParseError
                  | PandocMakePDFError Text
                  | PandocOptionError Text
                  | PandocSyntaxMapError Text
@@ -81,22 +82,28 @@ renderError e =
       "Please report this to pandoc's developers: " <> s
     PandocSomeError s -> s
     PandocParseError s -> s
-    PandocParsecError input err' ->
+    PandocParsecError (Sources inputs) err' ->
         let errPos = errorPos err'
             errLine = sourceLine errPos
             errColumn = sourceColumn errPos
-            ls = T.lines input <> [""]
-            errorInFile = if length ls > errLine - 1
-                            then T.concat ["\n", ls !! (errLine - 1)
-                                          ,"\n", T.replicate (errColumn - 1) " "
-                                          ,"^"]
-                        else ""
-        in  "\nError at " <> tshow  err' <>
-                     -- if error comes from a chunk or included file,
-                     -- then we won't get the right text this way:
-                     if sourceName errPos == "source"
-                        then errorInFile
-                        else ""
+            errFile = sourceName errPos
+            errorInFile =
+              case sortOn (Down . sourceLine . fst)
+                      [ (pos,t)
+                        | (pos,t) <- inputs
+                        , sourceName pos == errFile
+                        , sourceLine pos <= errLine
+                      ] of
+                []  -> ""
+                ((pos,txt):_) ->
+                  let ls = T.lines txt <> [""]
+                      ln = errLine - sourceLine pos
+                   in if length ls > ln - 1
+                         then T.concat ["\n", ls !! (ln - 1)
+                                       ,"\n", T.replicate (errColumn - 1) " "
+                                       ,"^"]
+                         else ""
+        in  "\nError at " <> tshow  err' <> errorInFile
     PandocMakePDFError s -> s
     PandocOptionError s -> s
     PandocSyntaxMapError s -> s
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index efd2188f1..8c7292b69 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -241,9 +241,11 @@ instance ToJSON LogMessage where
 showPos :: SourcePos -> Text
 showPos pos = Text.pack $ sn ++ "line " ++
      show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
-  where sn = if sourceName pos == "source" || sourceName pos == ""
-                then ""
-                else sourceName pos ++ " "
+  where
+    sn' = sourceName pos
+    sn = if sn' == "source" || sn' == "" || sn' == "-"
+            then ""
+            else sn' ++ " "
 
 encodeLogMessages :: [LogMessage] -> BL.ByteString
 encodeLogMessages ms =
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 37ab0adaa..11c4c7a62 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -5,7 +5,6 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE IncoherentInstances        #-}
 {-# LANGUAGE MultiParamTypeClasses      #-}
-{-# LANGUAGE ViewPatterns               #-}
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Parsing
@@ -19,8 +18,7 @@
 A utility library with parsers used in pandoc readers.
 -}
 
-module Text.Pandoc.Parsing ( take1WhileP,
-                             takeP,
+module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
                              countChar,
                              textStr,
                              anyLine,
@@ -134,22 +132,10 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              getInput,
                              setInput,
                              unexpected,
-                             char,
-                             letter,
-                             digit,
-                             alphaNum,
                              skipMany,
                              skipMany1,
-                             spaces,
-                             space,
-                             anyChar,
-                             satisfy,
-                             newline,
-                             string,
                              count,
                              eof,
-                             noneOf,
-                             oneOf,
                              lookAhead,
                              notFollowedBy,
                              many,
@@ -174,6 +160,8 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              SourcePos,
                              getPosition,
                              setPosition,
+                             sourceName,
+                             setSourceName,
                              sourceColumn,
                              sourceLine,
                              setSourceColumn,
@@ -189,16 +177,25 @@ module Text.Pandoc.Parsing ( take1WhileP,
 where
 
 import Control.Monad.Identity
+    ( guard,
+      join,
+      unless,
+      when,
+      void,
+      liftM2,
+      liftM,
+      Identity(..),
+      MonadPlus(mzero) )
 import Control.Monad.Reader
+    ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
 import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
                   isPunctuation, isSpace, ord, toLower, toUpper)
-import Data.Default
+import Data.Default ( Default(..) )
 import Data.Functor (($>))
 import Data.List (intercalate, transpose)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe)
 import qualified Data.Set as Set
-import Data.String
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -207,22 +204,108 @@ import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
 import Text.Pandoc.Definition
+    ( Target,
+      nullMeta,
+      nullAttr,
+      Meta,
+      ColWidth(ColWidthDefault, ColWidth),
+      TableFoot(TableFoot),
+      TableBody(TableBody),
+      Attr,
+      TableHead(TableHead),
+      Row(..),
+      Alignment(..),
+      Inline(Str),
+      ListNumberDelim(..),
+      ListAttributes,
+      ListNumberStyle(..) )
 import Text.Pandoc.Logging
+    ( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) )
 import Text.Pandoc.Options
+    ( extensionEnabled,
+      Extension(Ext_old_dashes, Ext_tex_math_dollars,
+                Ext_tex_math_single_backslash, Ext_tex_math_double_backslash,
+                Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart),
+      ReaderOptions(readerTabStop, readerColumns, readerExtensions) )
 import Text.Pandoc.Readers.LaTeX.Types (Macro)
 import Text.Pandoc.Shared
+    ( uniqueIdent,
+      tshow,
+      mapLeft,
+      compactify,
+      trim,
+      trimr,
+      splitTextByIndices,
+      safeRead,
+      trimMath,
+      schemes,
+      escapeURI )
+import Text.Pandoc.Sources
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
 import Text.Pandoc.XML (fromEntities)
-import Text.Parsec hiding (token)
-import Text.Parsec.Pos (initialPos, newPos, updatePosString)
-
-import Control.Monad.Except
+import Text.Parsec
+    ( between,
+      setSourceName,
+      Parsec,
+      Column,
+      Line,
+      incSourceLine,
+      incSourceColumn,
+      setSourceLine,
+      setSourceColumn,
+      sourceLine,
+      sourceColumn,
+      sourceName,
+      setSourceName,
+      setPosition,
+      getPosition,
+      updateState,
+      setState,
+      getState,
+      optionMaybe,
+      optional,
+      option,
+      endBy1,
+      endBy,
+      sepEndBy1,
+      sepEndBy,
+      sepBy1,
+      sepBy,
+      try,
+      choice,
+      (<?>),
+      (<|>),
+      manyTill,
+      many1,
+      many,
+      notFollowedBy,
+      lookAhead,
+      eof,
+      count,
+      skipMany1,
+      skipMany,
+      unexpected,
+      setInput,
+      getInput,
+      anyToken,
+      tokenPrim,
+      parse,
+      runParserT,
+      runParser,
+      ParseError,
+      ParsecT,
+      SourcePos,
+      Stream(..) )
+import Text.Parsec.Pos (initialPos, newPos)
+import Control.Monad.Except ( MonadError(throwError) )
 import Text.Pandoc.Error
+    ( PandocError(PandocParseError, PandocParsecError) )
 
 type Parser t s = Parsec t s
 
 type ParserT = ParsecT
 
+
 -- | Reader monad wrapping the parser state. This is used to possibly delay
 -- evaluation until all relevant information has been parsed and made available
 -- in the parser state.
@@ -251,70 +334,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where
   mappend = (<>)
 
 -- | Like @count@, but packs its result
-countChar :: (Stream s m Char, Monad m)
+countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m)
           => Int
           -> ParsecT s st m Char
           -> ParsecT s st m Text
 countChar n = fmap T.pack . count n
 
 -- | Like @string@, but uses @Text@.
-textStr :: Stream s m Char => Text -> ParsecT s u m Text
+textStr :: (Stream s m Char, UpdateSourcePos s Char)
+        => Text -> ParsecT s u m Text
 textStr t = string (T.unpack t) $> t
 
--- | Parse characters while a predicate is true.
-take1WhileP :: Monad m
-           => (Char -> Bool)
-           -> ParserT Text st m Text
-take1WhileP f = do
-  -- needed to persuade parsec that this won't match an empty string:
-  c <- satisfy f
-  inp <- getInput
-  pos <- getPosition
-  let (t, rest) = T.span f inp
-  setInput rest
-  setPosition $
-    if f '\t' || f '\n'
-       then updatePosString pos $ T.unpack t
-       else incSourceColumn pos (T.length t)
-  return $ T.singleton c <> t
-
--- Parse n characters of input (or the rest of the input if
--- there aren't n characters).
-takeP :: Monad m => Int -> ParserT Text st m Text
-takeP n = do
-  guard (n > 0)
-  -- faster than 'count n anyChar'
-  inp <- getInput
-  pos <- getPosition
-  let (xs, rest) = T.splitAt n inp
-  -- needed to persuade parsec that this won't match an empty string:
-  anyChar
-  setInput rest
-  setPosition $ updatePosString pos $ T.unpack xs
-  return xs
-
--- | Parse any line of text
-anyLine :: Monad m => ParserT Text st m Text
+
+-- | Parse any line of text, returning the contents without the
+-- final newline.
+anyLine :: Monad m => ParserT Sources st m Text
 anyLine = do
   -- This is much faster than:
   -- manyTill anyChar newline
   inp <- getInput
-  pos <- getPosition
-  case T.break (=='\n') inp of
-       (this, T.uncons -> Just ('\n', rest)) -> do
-         -- needed to persuade parsec that this won't match an empty string:
-         anyChar
-         setInput rest
-         setPosition $ incSourceLine (setSourceColumn pos 1) 1
-         return this
-       _ -> mzero
+  case inp of
+    Sources [] -> mzero
+    Sources ((fp,t):inps) ->
+      -- we assume that lines don't span different input files
+      case T.break (=='\n') t of
+           (this, rest)
+             | T.null rest
+             , not (null inps) ->
+                -- line may span different input files, so do it
+                 -- character by character
+                 T.pack <$> manyTill anyChar newline
+             | otherwise -> do --  either end of inputs or newline in rest
+                 setInput $ Sources ((fp, rest):inps)
+                 char '\n' -- needed so parsec knows we won't match empty string
+                           -- and so source pos is updated
+                 return this
 
 -- | Parse any line, include the final newline in the output
-anyLineNewline :: Monad m => ParserT Text st m Text
+anyLineNewline :: Monad m => ParserT Sources st m Text
 anyLineNewline = (<> "\n") <$> anyLine
 
 -- | Parse indent by specified number of spaces (or equiv. tabs)
-indentWith :: Stream s m Char
+indentWith :: (Stream s m Char, UpdateSourcePos s Char)
            => HasReaderOptions st
            => Int -> ParserT s st m Text
 indentWith num = do
@@ -399,11 +460,13 @@ notFollowedBy' p  = try $ join $  do  a <- try p
                                   return (return ())
 -- (This version due to Andrew Pimlott on the Haskell mailing list.)
 
-oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
+oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char)
+              => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
 oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
 
 -- TODO: This should be re-implemented in a Text-aware way
-oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
+oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char)
+               => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
 oneOfStrings'' _ []   = Prelude.fail "no strings"
 oneOfStrings'' matches strs = try $ do
   c <- anyChar
@@ -418,14 +481,16 @@ oneOfStrings'' matches strs = try $ do
 -- | Parses one of a list of strings.  If the list contains
 -- two strings one of which is a prefix of the other, the longer
 -- string will be matched if possible.
-oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char)
+             => [Text] -> ParserT s st m Text
 oneOfStrings = oneOfStrings' (==)
 
 -- | Parses one of a list of strings (tried in order), case insensitive.
 
 -- TODO: This will not be accurate with general Unicode (neither
 -- Text.toLower nor Text.toCaseFold can be implemented with a map)
-oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char)
+               => [Text] -> ParserT s st m Text
 oneOfStringsCI = oneOfStrings' ciMatch
   where ciMatch x y = toLower' x == toLower' y
         -- this optimizes toLower by checking common ASCII case
@@ -436,11 +501,13 @@ oneOfStringsCI = oneOfStrings' ciMatch
                    | otherwise = toLower c
 
 -- | Parses a space or tab.
-spaceChar :: Stream s m Char => ParserT s st m Char
+spaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
 
 -- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Stream s m Char => ParserT s st m Char
+nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+             => ParserT s st m Char
 nonspaceChar = satisfy (not . isSpaceChar)
 
 isSpaceChar :: Char -> Bool
@@ -451,21 +518,24 @@ isSpaceChar '\r' = True
 isSpaceChar _    = False
 
 -- | Skips zero or more spaces or tabs.
-skipSpaces :: Stream s m Char => ParserT s st m ()
+skipSpaces :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m ()
 skipSpaces = skipMany spaceChar
 
 -- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Stream s m Char => ParserT s st m Char
+blankline :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 blankline = try $ skipSpaces >> newline
 
 -- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Stream s m Char => ParserT s st m Text
+blanklines :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m Text
 blanklines = T.pack <$> many1 blankline
 
 -- | Gobble n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleSpaces :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m ()
+             => Int -> ParserT Sources st m ()
 gobbleSpaces 0 = return ()
 gobbleSpaces n
   | n < 0     = error "gobbleSpaces called with negative number"
@@ -473,18 +543,26 @@ gobbleSpaces n
       char ' ' <|> eatOneSpaceOfTab
       gobbleSpaces (n - 1)
 
-eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char
+eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char
 eatOneSpaceOfTab = do
-  char '\t'
+  lookAhead (char '\t')
+  pos <- getPosition
   tabstop <- getOption readerTabStop
+  -- replace the tab on the input stream with spaces
+  let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop)
   inp <- getInput
-  setInput $ T.replicate (tabstop - 1) " " <> inp
-  return ' '
+  setInput $
+    case inp of
+      Sources [] -> error "eatOneSpaceOfTab - empty Sources list"
+      Sources ((fp,t):rest) ->
+        -- drop the tab and add spaces
+        Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest)
+  char ' '
 
 -- | Gobble up to n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
-                   => Int -> ParserT Text st m Int
+                   => Int -> ParserT Sources st m Int
 gobbleAtMostSpaces 0 = return 0
 gobbleAtMostSpaces n
   | n < 0     = error "gobbleAtMostSpaces called with negative number"
@@ -493,7 +571,8 @@ gobbleAtMostSpaces n
       (+ 1) <$> gobbleAtMostSpaces (n - 1)
 
 -- | Parses material enclosed between start and end parsers.
-enclosed :: (Show end, Stream s  m Char) => ParserT s st m t   -- ^ start parser
+enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char)
+         => ParserT s st m t   -- ^ start parser
          -> ParserT s st m end  -- ^ end parser
          -> ParserT s st m a    -- ^ content parser (to be used repeatedly)
          -> ParserT s st m [a]
@@ -501,39 +580,41 @@ enclosed start end parser = try $
   start >> notFollowedBy space >> many1Till parser end
 
 -- | Parse string, case insensitive.
-stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text
+stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char)
+              => Text -> ParserT s st m Text
 stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
 
-stringAnyCase' :: Stream s m Char => String -> ParserT s st m String
+stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char)
+               => String -> ParserT s st m String
 stringAnyCase' [] = string ""
 stringAnyCase' (x:xs) = do
   firstChar <- char (toUpper x) <|> char (toLower x)
   rest <- stringAnyCase' xs
   return (firstChar:rest)
 
+-- TODO rewrite by just adding to Sources stream?
 -- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: (Stream s m Char, IsString s)
-                => ParserT s st m r
+parseFromString :: Monad m
+                => ParserT Sources st m r
                 -> Text
-                -> ParserT s st m r
+                -> ParserT Sources st m r
 parseFromString parser str = do
   oldPos <- getPosition
-  setPosition $ initialPos " chunk"
+  setPosition $ initialPos "chunk"
   oldInput <- getInput
-  setInput $ fromString $ T.unpack str
+  setInput $ toSources str
   result <- parser
   spaces
-  eof
   setInput oldInput
   setPosition oldPos
   return result
 
 -- | Like 'parseFromString' but specialized for 'ParserState'.
 -- This resets 'stateLastStrPos', which is almost always what we want.
-parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u)
-                 => ParserT s u m a
+parseFromString' :: (Monad m, HasLastStrPosition u)
+                 => ParserT Sources u m a
                  -> Text
-                 -> ParserT s u m a
+                 -> ParserT Sources u m a
 parseFromString' parser str = do
   oldLastStrPos <- getLastStrPos <$> getState
   updateState $ setLastStrPos Nothing
@@ -542,7 +623,7 @@ parseFromString' parser str = do
   return res
 
 -- | Parse raw line block up to and including blank lines.
-lineClump :: Monad m => ParserT Text st m Text
+lineClump :: Monad m => ParserT Sources st m Text
 lineClump = blanklines
           <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
 
@@ -551,7 +632,7 @@ lineClump = blanklines
 -- pairs of open and close, which must be different. For example,
 -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
 -- and return "hello (there)".
-charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
+charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char
                 -> ParserT s st m Text
 charsInBalanced open close parser = try $ do
   char open
@@ -570,7 +651,7 @@ charsInBalanced open close parser = try $ do
 -- Auxiliary functions for romanNumeral:
 
 -- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Stream s m Char => Bool                  -- ^ Uppercase if true
+romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool                  -- ^ Uppercase if true
              -> ParserT s st m Int
 romanNumeral upperCase = do
     let rchar uc = char $ if upperCase then uc else toLower uc
@@ -606,7 +687,7 @@ romanNumeral upperCase = do
 
 -- | Parses an email address; returns original and corresponding
 -- escaped mailto: URI.
-emailAddress :: Stream s m Char => ParserT s st m (Text, Text)
+emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
  where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
                            in  (full, escapeURI $ "mailto:" <> full)
@@ -630,11 +711,11 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
        isEmailPunct c    = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
 
 
-uriScheme :: Stream s m Char => ParserT s st m Text
+uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text
 uriScheme = oneOfStringsCI (Set.toList schemes)
 
 -- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Stream s m Char => ParserT s st m (Text, Text)
+uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 uri = try $ do
   scheme <- uriScheme
   char ':'
@@ -677,7 +758,7 @@ uri = try $ do
     uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
                                    return (T.pack $ [l] ++ chunk ++ [r])
 
-mathInlineWith :: Stream s m Char  => Text -> Text -> ParserT s st m Text
+mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char)  => Text -> Text -> ParserT s st m Text
 mathInlineWith op cl = try $ do
   textStr op
   when (op == "$") $ notFollowedBy space
@@ -698,10 +779,10 @@ mathInlineWith op cl = try $ do
   notFollowedBy digit  -- to prevent capture of $5
   return $ trimMath $ T.concat words'
  where
-  inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text
+  inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text
   inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
 
-  inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String
+  inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String
   inBalancedBraces' 0 "" = do
     c <- anyChar
     if c == '{'
@@ -718,13 +799,13 @@ mathInlineWith op cl = try $ do
          '{' -> inBalancedBraces' (numOpen + 1) (c:xs)
          _   -> inBalancedBraces' numOpen (c:xs)
 
-mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
+mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
 mathDisplayWith op cl = try $ fmap T.pack $ do
   textStr op
   many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
             (try $ textStr cl)
 
-mathDisplay :: (HasReaderOptions st, Stream s m Char)
+mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m Text
 mathDisplay =
       (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
@@ -733,7 +814,7 @@ mathDisplay =
   <|> (guardEnabled Ext_tex_math_double_backslash >>
        mathDisplayWith "\\\\[" "\\\\]")
 
-mathInline :: (HasReaderOptions st , Stream s m Char)
+mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m Text
 mathInline =
       (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
@@ -746,7 +827,7 @@ mathInline =
 -- displacement (the difference between the source column at the end
 -- and the source column at the beginning). Vertical displacement
 -- (source row) is ignored.
-withHorizDisplacement :: Stream s m Char
+withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char)
                       => ParserT s st m a  -- ^ Parser to apply
                       -> ParserT s st m (a, Int) -- ^ (result, displacement)
 withHorizDisplacement parser = do
@@ -758,30 +839,37 @@ withHorizDisplacement parser = do
 -- | Applies a parser and returns the raw string that was parsed,
 -- along with the value produced by the parser.
 withRaw :: Monad m
-        => ParsecT Text st m a
-        -> ParsecT Text st m (a, Text)
+        => ParsecT Sources st m a
+        -> ParsecT Sources st m (a, Text)
 withRaw parser = do
-  pos1 <- getPosition
-  inp <- getInput
+  inps1 <- getInput
   result <- parser
-  pos2 <- getPosition
-  let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
-  let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
-  let inplines = take ((l2 - l1) + 1) $ T.lines inp
-  let raw = case inplines of
-                []  -> ""
-                [l] -> T.take (c2 - c1) l
-                ls  -> T.unlines (init ls) <> T.take (c2 - 1) (last ls)
-  return (result, raw)
+  inps2 <- getInput
+  -- 'raw' is the difference between inps1 and inps2
+  return (result, sourcesDifference inps1 inps2)
+
+sourcesDifference :: Sources -> Sources -> Text
+sourcesDifference (Sources is1) (Sources is2) = go is1 is2
+ where
+   go inps1 inps2 =
+    case (inps1, inps2) of
+      ([], _) -> mempty
+      (_, []) -> mconcat $ map snd inps1
+      ((p1,t1):rest1, (p2, t2):rest2)
+        | p1 == p2
+        , t1 == t2  -> go rest1 rest2
+        | p1 == p2
+        , t1 /= t2  -> fromMaybe mempty $ T.stripSuffix t2 t1
+        | otherwise -> t1 <> go rest1 inps2
 
 -- | Parses backslash, then applies character parser.
-escaped :: Stream s m Char
+escaped :: (Stream s m Char, UpdateSourcePos s Char)
         => ParserT s st m Char  -- ^ Parser for character to escape
         -> ParserT s st m Char
 escaped parser = try $ char '\\' >> parser
 
 -- | Parse character entity.
-characterReference :: Stream s m Char => ParserT s st m Char
+characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 characterReference = try $ do
   char '&'
   ent <- many1Till nonspaceChar (char ';')
@@ -794,19 +882,19 @@ characterReference = try $ do
        _            -> Prelude.fail "entity not found"
 
 -- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperRoman = do
   num <- romanNumeral True
   return (UpperRoman, num)
 
 -- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerRoman = do
   num <- romanNumeral False
   return (LowerRoman, num)
 
 -- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 decimal = do
   num <- many1 digit
   return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
@@ -815,7 +903,7 @@ decimal = do
 -- returns (DefaultStyle, [next example number]).  The next
 -- example number is incremented in parser state, and the label
 -- (if present) is added to the label table.
-exampleNum :: Stream s m Char
+exampleNum :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s ParserState m (ListNumberStyle, Int)
 exampleNum = do
   char '@'
@@ -834,37 +922,37 @@ exampleNum = do
   return (Example, num)
 
 -- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 defaultNum = do
   char '#'
   return (DefaultStyle, 1)
 
 -- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerAlpha = do
   ch <- satisfy isAsciiLower
   return (LowerAlpha, ord ch - ord 'a' + 1)
 
 -- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperAlpha = do
   ch <- satisfy isAsciiUpper
   return (UpperAlpha, ord ch - ord 'A' + 1)
 
 -- | Parses a roman numeral i or I
-romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
            (char 'I' >> return (UpperRoman, 1))
 
 -- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
+anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes
 anyOrderedListMarker = choice
   [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
                            numParser <- [decimal, exampleNum, defaultNum, romanOne,
                            lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
 
 -- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Stream s m Char
+inPeriod :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m (ListNumberStyle, Int)
          -> ParserT s st m ListAttributes
 inPeriod num = try $ do
@@ -876,7 +964,7 @@ inPeriod num = try $ do
   return (start, style, delim)
 
 -- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Stream s m Char
+inOneParen :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m (ListNumberStyle, Int)
            -> ParserT s st m ListAttributes
 inOneParen num = try $ do
@@ -885,7 +973,7 @@ inOneParen num = try $ do
   return (start, style, OneParen)
 
 -- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Stream s m Char
+inTwoParens :: (Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m (ListNumberStyle, Int)
             -> ParserT s st m ListAttributes
 inTwoParens num = try $ do
@@ -896,7 +984,7 @@ inTwoParens num = try $ do
 
 -- | Parses an ordered list marker with a given style and delimiter,
 -- returns number.
-orderedListMarker :: Stream s m Char
+orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char)
                   => ListNumberStyle
                   -> ListNumberDelim
                   -> ParserT s ParserState m Int
@@ -919,10 +1007,10 @@ orderedListMarker style delim = do
   return start
 
 -- | Parses a character reference and returns a Str element.
-charRef :: Stream s m Char => ParserT s st m Inline
+charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline
 charRef = Str . T.singleton <$> characterReference
 
-lineBlockLine :: Monad m => ParserT Text st m Text
+lineBlockLine :: Monad m => ParserT Sources st m Text
 lineBlockLine = try $ do
   char '|'
   char ' '
@@ -932,11 +1020,11 @@ lineBlockLine = try $ do
   continuations <- many (try $ char ' ' >> anyLine)
   return $ white <> T.unwords (line : continuations)
 
-blankLineBlockLine :: Stream s m Char => ParserT s st m Char
+blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 blankLineBlockLine = try (char '|' >> blankline)
 
 -- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Monad m => ParserT Text st m [Text]
+lineBlockLines :: Monad m => ParserT Sources st m [Text]
 lineBlockLines = try $ do
   lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
   skipMany blankline
@@ -944,7 +1032,8 @@ lineBlockLines = try $ do
 
 -- | Parse a table using 'headerParser', 'rowParser',
 -- 'lineParser', and 'footerParser'.
-tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith :: (Stream s m Char, UpdateSourcePos s Char,
+              HasReaderOptions st, Monad mf)
           => ParserT s st m (mf [Blocks], [Alignment], [Int])
           -> ([Int] -> ParserT s st m (mf [Blocks]))
           -> ParserT s st m sep
@@ -964,7 +1053,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do
 
 type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
 
-tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
+               HasReaderOptions st, Monad mf)
            => ParserT s st m (mf [Blocks], [Alignment], [Int])
            -> ([Int] -> ParserT s st m (mf [Blocks]))
            -> ParserT s st m sep
@@ -1013,20 +1103,19 @@ widthsFromIndices numColumns' indices =
 -- (which may be grid), then the rows,
 -- which may be grid, separated by blank lines, and
 -- ending with a footer (dashed line followed by blank line).
-gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                  Monad mf, IsString s)
-              => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf)
+              => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
               -> Bool                        -- ^ Headerless table
-              -> ParserT s st m (mf Blocks)
+              -> ParserT Sources st m (mf Blocks)
 gridTableWith blocks headless =
   tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
             (gridTableSep '-') gridTableFooter
 
-gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                   Monad mf, IsString s)
-               => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st,
+                   Monad mf)
+               => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
                -> Bool                        -- ^ Headerless table
-               -> ParserT s st m (TableComponents mf)
+               -> ParserT Sources st m (TableComponents mf)
 gridTableWith' blocks headless =
   tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
              (gridTableSep '-') gridTableFooter
@@ -1035,7 +1124,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
 gridTableSplitLine indices line = map removeFinalBar $ tail $
   splitTextByIndices (init indices) $ trimr line
 
-gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
+gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
 gridPart ch = do
   leftColon <- option False (True <$ char ':')
   dashes <- many1 (char ch)
@@ -1050,7 +1139,7 @@ gridPart ch = do
                        (False, False) -> AlignDefault
   return ((lengthDashes, lengthDashes + 1), alignment)
 
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
+gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)]
 gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
 
 removeFinalBar :: Text -> Text
@@ -1059,14 +1148,14 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
     go c = T.any (== c) " \t"
 
 -- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
+gridTableSep :: Monad m => Char -> ParserT Sources st m Char
 gridTableSep ch = try $ gridDashedLines ch >> return '\n'
 
 -- | Parse header for a grid table.
-gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
+gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st)
                 => Bool -- ^ Headerless table
-                -> ParserT s st m (mf Blocks)
-                -> ParserT s st m (mf [Blocks], [Alignment], [Int])
+                -> ParserT Sources st m (mf Blocks)
+                -> ParserT Sources st m (mf [Blocks], [Alignment], [Int])
 gridTableHeader True _ = do
   optional blanklines
   dashes <- gridDashedLines '-'
@@ -1089,17 +1178,17 @@ gridTableHeader False blocks = try $ do
   heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
   return (heads, aligns, indices)
 
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text]
+gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text]
 gridTableRawLine indices = do
   char '|'
   line <- many1Till anyChar newline
   return (gridTableSplitLine indices $ T.pack line)
 
 -- | Parse row of grid table.
-gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
-             => ParserT s st m (mf Blocks)
+gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st)
+             => ParserT Sources st m (mf Blocks)
              -> [Int]
-             -> ParserT s st m (mf [Blocks])
+             -> ParserT Sources st m (mf [Blocks])
 gridTableRow blocks indices = do
   colLines <- many1 (gridTableRawLine indices)
   let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
@@ -1120,34 +1209,38 @@ removeOneLeadingSpace xs =
            Just (c, _) -> c == ' '
 
 -- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s st m ()
+gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 gridTableFooter = optional blanklines
 
 ---
 
 -- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: Monad m
-          => ParserT Text st m a -- ^ parser
-          -> st                  -- ^ initial state
-          -> Text                -- ^ input
+readWithM :: (Monad m, ToSources t)
+          => ParserT Sources st m a  -- ^ parser
+          -> st                      -- ^ initial state
+          -> t                       -- ^ input
           -> m (Either PandocError a)
 readWithM parser state input =
-    mapLeft (PandocParsecError input) <$> runParserT parser state "source" input
+    mapLeft (PandocParsecError sources)
+      <$> runParserT parser state (initialSourceName sources) sources
+ where
+   sources = toSources input
 
 -- | Parse a string with a given parser and state
-readWith :: Parser Text st a
+readWith :: ToSources t
+         => Parser Sources st a
          -> st
-         -> Text
+         -> t
          -> Either PandocError a
 readWith p t inp = runIdentity $ readWithM p t inp
 
 -- | Parse a string with @parser@ (for testing).
 testStringWith :: Show a
-               => ParserT Text ParserState Identity a
+               => ParserT Sources ParserState Identity a
                -> Text
                -> IO ()
 testStringWith parser str = UTF8.putStrLn $ tshow $
-                            readWith parser defaultParserState str
+                            readWith parser defaultParserState (toSources str)
 
 -- | Parsing options.
 data ParserState = ParserState
@@ -1394,19 +1487,23 @@ registerHeader (ident,classes,kvs) header' = do
           updateState $ updateIdentifierList $ Set.insert ident
         return (ident,classes,kvs)
 
-smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m Inlines
                  -> ParserT s st m Inlines
 smartPunctuation inlineParser = do
   guardEnabled Ext_smart
   choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
 
-quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+quoted :: (HasLastStrPosition st, HasQuoteContext st m,
+           Stream s m Char, UpdateSourcePos s Char)
        => ParserT s st m Inlines
        -> ParserT s st m Inlines
 quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 
-singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
 singleQuoted inlineParser = do
@@ -1416,7 +1513,8 @@ singleQuoted inlineParser = do
      (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
    <|> pure "\8217"
 
-doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char)
+doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
 doubleQuoted inlineParser = do
@@ -1433,13 +1531,14 @@ failIfInQuoteContext context = do
   context' <- getQuoteContext
   when (context' == context) $ Prelude.fail "already inside quotes"
 
-charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char
+charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char
 charOrRef cs =
   oneOf cs <|> try (do c <- characterReference
                        guard (c `elem` cs)
                        return c)
 
-singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
@@ -1449,7 +1548,7 @@ singleQuoteStart = do
     charOrRef "'\8216\145"
     void $ lookAhead (satisfy (not . isSpaceChar))
 
-singleQuoteEnd :: Stream s m Char
+singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 singleQuoteEnd = try $ do
   charOrRef "'\8217\146"
@@ -1457,7 +1556,7 @@ singleQuoteEnd = try $ do
 
 doubleQuoteStart :: (HasLastStrPosition st,
                      HasQuoteContext st m,
-                     Stream s m Char)
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
@@ -1465,21 +1564,21 @@ doubleQuoteStart = do
   try $ do charOrRef "\"\8220\147"
            void $ lookAhead (satisfy (not . isSpaceChar))
 
-doubleQuoteEnd :: Stream s m Char
+doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 doubleQuoteEnd = void (charOrRef "\"\8221\148")
 
-apostrophe :: Stream s m Char => ParserT s st m Inlines
+apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
 apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
 
-doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines
+doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
 doubleCloseQuote = B.str "\8221" <$ char '"'
 
-ellipses :: Stream s m Char
+ellipses :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m Inlines
 ellipses = try (string "..." >> return (B.str "\8230"))
 
-dash :: (HasReaderOptions st, Stream s m Char)
+dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
      => ParserT s st m Inlines
 dash = try $ do
   oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
@@ -1506,7 +1605,7 @@ nested p = do
   updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
   return res
 
-citeKey :: (Stream s m Char, HasLastStrPosition st)
+citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
         => ParserT s st m (Bool, Text)
 citeKey = try $ do
   guard =<< notAfterString
@@ -1575,10 +1674,11 @@ insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
 insertIncludedFile blocks totoks dirs f =
   runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
 
+-- TODO: replace this with something using addToSources.
 -- | Parse content of include file as future blocks. Circular includes result in
 -- an @PandocParseError@.
 insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT Text st m (Future st Blocks)
+                    => ParserT Sources st m (Future st Blocks)
                     -> [FilePath] -> FilePath
-                    -> ParserT Text st m (Future st Blocks)
-insertIncludedFileF p = insertIncludedFile' p id
+                    -> ParserT Sources st m (Future st Blocks)
+insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)])
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 7ae9db34f..5106f8058 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE MonoLocalBinds      #-}
+{-# LANGUAGE RankNTypes          #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
@@ -72,6 +73,7 @@ import Text.Pandoc.Error
 import Text.Pandoc.Extensions
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.CommonMark
+import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.Creole
 import Text.Pandoc.Readers.DocBook
 import Text.Pandoc.Readers.Docx
@@ -84,7 +86,6 @@ import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.JATS (readJATS)
 import Text.Pandoc.Readers.Jira (readJira)
 import Text.Pandoc.Readers.LaTeX
-import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.MediaWiki
 import Text.Pandoc.Readers.Muse
 import Text.Pandoc.Readers.Native
@@ -102,50 +103,52 @@ import Text.Pandoc.Readers.CSV
 import Text.Pandoc.Readers.CslJson
 import Text.Pandoc.Readers.BibTeX
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
+data Reader m = TextReader (forall a . ToSources a =>
+                                ReaderOptions -> a -> m Pandoc)
               | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
 
 -- | Association list of formats and readers.
 readers :: PandocMonad m => [(Text, Reader m)]
-readers = [ ("native"       , TextReader readNative)
-           ,("json"         , TextReader readJSON)
-           ,("markdown"     , TextReader readMarkdown)
-           ,("markdown_strict" , TextReader readMarkdown)
-           ,("markdown_phpextra" , TextReader readMarkdown)
-           ,("markdown_github" , TextReader readMarkdown)
-           ,("markdown_mmd",  TextReader readMarkdown)
-           ,("commonmark"   , TextReader readCommonMark)
-           ,("commonmark_x" , TextReader readCommonMark)
-           ,("creole"       , TextReader readCreole)
-           ,("dokuwiki"     , TextReader readDokuWiki)
-           ,("gfm"          , TextReader readCommonMark)
-           ,("rst"          , TextReader readRST)
-           ,("mediawiki"    , TextReader readMediaWiki)
-           ,("vimwiki"      , TextReader readVimwiki)
-           ,("docbook"      , TextReader readDocBook)
-           ,("opml"         , TextReader readOPML)
-           ,("org"          , TextReader readOrg)
-           ,("textile"      , TextReader readTextile) -- TODO : textile+lhs
-           ,("html"         , TextReader readHtml)
-           ,("jats"         , TextReader readJATS)
-           ,("jira"         , TextReader readJira)
-           ,("latex"        , TextReader readLaTeX)
-           ,("haddock"      , TextReader readHaddock)
-           ,("twiki"        , TextReader readTWiki)
-           ,("tikiwiki"     , TextReader readTikiWiki)
-           ,("docx"         , ByteStringReader readDocx)
-           ,("odt"          , ByteStringReader readOdt)
-           ,("t2t"          , TextReader readTxt2Tags)
-           ,("epub"         , ByteStringReader readEPUB)
-           ,("muse"         , TextReader readMuse)
-           ,("man"          , TextReader readMan)
-           ,("fb2"          , TextReader readFB2)
-           ,("ipynb"        , TextReader readIpynb)
-           ,("csv"          , TextReader readCSV)
-           ,("csljson"      , TextReader readCslJson)
-           ,("bibtex"       , TextReader readBibTeX)
-           ,("biblatex"     , TextReader readBibLaTeX)
+readers = [("native"       , TextReader readNative)
+          ,("json"         , TextReader readJSON)
+          ,("markdown"     , TextReader readMarkdown)
+          ,("markdown_strict" , TextReader readMarkdown)
+          ,("markdown_phpextra" , TextReader readMarkdown)
+          ,("markdown_github" , TextReader readMarkdown)
+          ,("markdown_mmd",  TextReader readMarkdown)
+          ,("commonmark"   , TextReader readCommonMark)
+          ,("commonmark_x" , TextReader readCommonMark)
+          ,("creole"       , TextReader readCreole)
+          ,("dokuwiki"     , TextReader readDokuWiki)
+          ,("gfm"          , TextReader readCommonMark)
+          ,("rst"          , TextReader readRST)
+          ,("mediawiki"    , TextReader readMediaWiki)
+          ,("vimwiki"      , TextReader readVimwiki)
+          ,("docbook"      , TextReader readDocBook)
+          ,("opml"         , TextReader readOPML)
+          ,("org"          , TextReader readOrg)
+          ,("textile"      , TextReader readTextile) -- TODO : textile+lhs
+          ,("html"         , TextReader readHtml)
+          ,("jats"         , TextReader readJATS)
+          ,("jira"         , TextReader readJira)
+          ,("latex"        , TextReader readLaTeX)
+          ,("haddock"      , TextReader readHaddock)
+          ,("twiki"        , TextReader readTWiki)
+          ,("tikiwiki"     , TextReader readTikiWiki)
+          ,("docx"         , ByteStringReader readDocx)
+          ,("odt"          , ByteStringReader readOdt)
+          ,("t2t"          , TextReader readTxt2Tags)
+          ,("epub"         , ByteStringReader readEPUB)
+          ,("muse"         , TextReader readMuse)
+          ,("man"          , TextReader readMan)
+          ,("fb2"          , TextReader readFB2)
+          ,("ipynb"        , TextReader readIpynb)
+          ,("csv"          , TextReader readCSV)
+          ,("csljson"      , TextReader readCslJson)
+          ,("bibtex"       , TextReader readBibTeX)
+          ,("biblatex"     , TextReader readBibLaTeX)
            ]
 
 -- | Retrieve reader, extensions based on formatSpec (format+extensions).
@@ -173,9 +176,13 @@ getReader s =
                      return (r, exts)
 
 -- | Read pandoc document from JSON format.
-readJSON :: PandocMonad m
-         => ReaderOptions -> Text -> m Pandoc
-readJSON _ t =
-  case eitherDecode' . BL.fromStrict . UTF8.fromText $ t of
+readJSON :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
+readJSON _ s =
+  case eitherDecode' . BL.fromStrict . UTF8.fromText
+                     . sourcesToText . toSources $ s of
        Right doc -> return doc
-       Left e    -> throwError $ PandocParseError ("JSON parse error: " <> T.pack e)
+       Left e    -> throwError $ PandocParseError ("JSON parse error: "
+                                                   <> T.pack e)
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index b82a81350..318afda85 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -23,30 +23,33 @@ where
 import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
-import Data.Text (Text)
 import Citeproc (Lang(..), parseLang)
 import Citeproc.Locale (getLocale)
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad, lookupEnv)
 import Text.Pandoc.Citeproc.BibTeX as BibTeX
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Text.Pandoc.Sources (ToSources(..))
 import Control.Monad.Except (throwError)
 
 -- | Read BibTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibTeX :: (PandocMonad m, ToSources a)
+           => ReaderOptions -> a -> m Pandoc
 readBibTeX = readBibTeX' BibTeX.Bibtex
 
 -- | Read BibLaTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibLaTeX :: (PandocMonad m, ToSources a)
+             => ReaderOptions -> a -> m Pandoc
 readBibLaTeX = readBibTeX' BibTeX.Biblatex
 
-readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
+readBibTeX' :: (PandocMonad m, ToSources a)
+            => Variant -> ReaderOptions -> a -> m Pandoc
 readBibTeX' variant _opts t = do
   mblangEnv <- lookupEnv "LANG"
   let defaultLang = Lang "en" Nothing (Just "US") [] [] []
@@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do
                    Left _  -> throwError $ PandocCiteprocError e
                Right l -> return l
   case BibTeX.readBibtexString variant locale (const True) t of
-    Left e -> throwError $ PandocParsecError t e
+    Left e -> throwError $ PandocParsecError (toSources t) e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
                          . setMeta "nocite"
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 2958d6180..eca8f9425 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -13,23 +13,23 @@
 Conversion from CSV to a 'Pandoc' table.
 -}
 module Text.Pandoc.Readers.CSV ( readCSV ) where
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Shared (crFilter)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.Options (ReaderOptions)
 import Control.Monad.Except (throwError)
 
-readCSV :: PandocMonad m
+readCSV :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
-readCSV _opts s =
-  case parseCSV defaultCSVOptions (crFilter s) of
+readCSV _opts s = do
+  let txt = sourcesToText $ toSources s
+  case parseCSV defaultCSVOptions txt of
     Right (r:rs) -> return $ B.doc $ B.table capt
                                              (zip aligns widths)
                                              (TableHead nullAttr hdrs)
@@ -45,4 +45,4 @@ readCSV _opts s =
              aligns = replicate numcols AlignDefault
              widths = replicate numcols ColWidthDefault
     Right []     -> return $ B.doc mempty
-    Left e       -> throwError $ PandocParsecError s e
+    Left e       -> throwError $ PandocParsecError (toSources [("",txt)]) e
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 244f77940..b099a9b50 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
 import Data.Typeable
-import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine,
-                            runF, defaultParserState, take1WhileP, option)
+import Text.Pandoc.Parsing (runParserT, getPosition,
+                            runF, defaultParserState, option, many1, anyChar,
+                            Sources(..), ToSources(..), ParserT, Future,
+                            sourceName)
 import qualified Data.Text as T
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCommonMark :: (PandocMonad m, ToSources a)
+               => ReaderOptions -> a -> m Pandoc
 readCommonMark opts s
-  | isEnabled Ext_yaml_metadata_block opts
-  , "---" `T.isPrefixOf` s = do
-       let metaValueParser = do
-             inp <- option "" $ take1WhileP (const True)
-             case runIdentity
-                    (commonmarkWith (specFor opts) "metadata value" inp) of
-                    Left _ -> mzero
-                    Right (Cm bls :: Cm () Blocks)
-                             -> return $ return $ B.toMetaValue bls
-       res <- runParserT (do meta <- yamlMetaBlock metaValueParser
-                             pos <- getPosition
-                             return (meta, pos))
-                         defaultParserState "YAML metadata" s
-       case res of
-         Left _ -> readCommonMarkBody opts s
-         Right (meta, pos) -> do
-           let dropLines 0 = id
-               dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n')
-           let metaLines = sourceLine pos - 1
-           let body = T.replicate metaLines "\n" <> dropLines metaLines s
-           Pandoc _ bs <- readCommonMarkBody opts body
-           return $ Pandoc (runF meta defaultParserState) bs
-  | otherwise = readCommonMarkBody opts s
+  | isEnabled Ext_yaml_metadata_block opts = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
+                          pos <- getPosition
+                          return (meta, pos))
+                      defaultParserState "YAML metadata" (toSources s)
+    case res of
+      Left _ -> readCommonMarkBody opts sources toks
+      Right (meta, pos) -> do
+        -- strip off metadata section and parse body
+        let body = dropWhile (\t -> tokPos t < pos) toks
+        Pandoc _ bs <- readCommonMarkBody opts sources body
+        return $ Pandoc (runF meta defaultParserState) bs
+  | otherwise = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    readCommonMarkBody opts sources toks
 
-readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMarkBody opts s
+sourceToToks :: (SourcePos, Text) -> [Tok]
+sourceToToks (pos, s) = tokenize (sourceName pos) s
+
+metaValueParser :: Monad m
+                => ReaderOptions -> ParserT Sources st m (Future st MetaValue)
+metaValueParser opts = do
+  inp <- option "" $ T.pack <$> many1 anyChar
+  let toks = concatMap sourceToToks (unSources (toSources inp))
+  case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+     Left _ -> mzero
+     Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
+readCommonMarkBody opts s toks
   | isEnabled Ext_sourcepos opts =
-    case runIdentity (commonmarkWith (specFor opts) "" s) of
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
       Left err -> throwError $ PandocParsecError s err
       Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
   | otherwise =
-    case runIdentity (commonmarkWith (specFor opts) "" s) of
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
       Left err -> throwError $ PandocParsecError s err
       Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
 
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 2658dfea2..ad848ada7 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed)
-import Text.Pandoc.Shared (crFilter)
-
 
 -- | Read creole from an input string and return a Pandoc document.
-readCreole :: PandocMonad m
+readCreole :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readCreole opts s = do
-  res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseCreole def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type CRLParser = ParserT Text ParserState
+type CRLParser = ParserT Sources ParserState
 
 --
 -- Utility functions
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 30bb19483..a0af5c325 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -24,21 +24,22 @@ import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
 import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
 import Control.Monad.Except (throwError)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read CSL JSON from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCslJson _opts t =
-  case cslJsonToReferences (UTF8.fromText t) of
+readCslJson :: (PandocMonad m, ToSources a)
+            => ReaderOptions -> a -> m Pandoc
+readCslJson _opts x =
+  case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
     Left e -> throwError $ PandocParseError $ T.pack e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ac3caa2c0..3db459cfd 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -30,7 +30,8 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
 import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.Pandoc.XML.Light
 
@@ -539,11 +540,15 @@ instance Default DBState where
                , dbContent = [] }
 
 
-readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readDocBook _ inp = do
+  let sources = toSources inp
   tree <- either (throwError . PandocXMLError "") return $
             parseXMLContents
-              (TL.fromStrict . handleInstructions $ crFilter inp)
+              (TL.fromStrict . handleInstructions . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index dedc1f03f..db98ac8de 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -29,26 +29,27 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
+import Text.Pandoc.Shared (trim, stringify, tshow)
 
 -- | Read DokuWiki from an input string and return a Pandoc document.
-readDokuWiki :: PandocMonad m
+readDokuWiki :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readDokuWiki opts s = do
-  let input = crFilter s
-  res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+  let sources = toSources s
+  res <- runParserT parseDokuWiki def {stateOptions = opts }
+           (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
-type DWParser = ParserT Text ParserState
+type DWParser = ParserT Sources ParserState
 
 -- * Utility functions
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 nested :: PandocMonad m => DWParser m a -> DWParser m a
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index 66e390bd7..84e5278db 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter)
 import Text.Pandoc.XML.Light
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type FB2 m = StateT FB2State m
 
@@ -63,9 +63,12 @@ instance HasMeta FB2State where
   setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
   deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
 
-readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readFB2 :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
 readFB2 _ inp =
-  case parseXMLElement $ TL.fromStrict $ crFilter inp of
+  case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
     Left msg -> throwError $ PandocXMLError "" msg
     Right el ->  do
       (bs, st) <- runStateT (parseRootElement el) def
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c3e68afd8..f5c8a2277 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -62,21 +62,21 @@ import Text.Pandoc.Options (
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
 import Text.Pandoc.Shared (
-    addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces,
+    addMetaField, blocksToInlines', escapeURI, extractSpaces,
     htmlSpanLikeElements, renderTags', safeRead, tshow)
 import Text.Pandoc.Walk
 import Text.Parsec.Error
 import Text.TeXMath (readMathML, writeTeX)
 
 -- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
+readHtml :: (PandocMonad m, ToSources a)
          => ReaderOptions -- ^ Reader options
-         -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+         -> a             -- ^ Input to parse
          -> m Pandoc
 readHtml opts inp = do
   let tags = stripPrefixes $ canonicalizeTags $
              parseTagsOptions parseOptions{ optTagPosition = True }
-             (crFilter inp)
+             (sourcesToText $ toSources inp)
       parseDoc = do
         blocks <- fixPlains False . mconcat <$> manyTill block eof
         meta <- stateMeta . parserState <$> getState
@@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
 
 pTagText :: PandocMonad m => TagParser m Inlines
 pTagText = try $ do
+  pos <- getPosition
   (TagText str) <- pSatisfy isTagText
   st <- getState
   qu <- ask
   parsed <- lift $ lift $
-            flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+            flip runReaderT qu $ runParserT (many pTagContents) st "text"
+               (Sources [(pos, str)])
   case parsed of
        Left _        -> throwError $ PandocParseError $
                         "Could not parse `" <> str <> "'"
        Right result  -> return $ mconcat result
 
-type InlinesParser m = HTMLParser m Text
+type InlinesParser m = HTMLParser m Sources
 
 pTagContents :: PandocMonad m => InlinesParser m Inlines
 pTagContents =
@@ -970,13 +972,14 @@ isCommentTag = tagComment (const True)
 -- | Matches a stretch of HTML in balanced tags.
 htmlInBalanced :: Monad m
                => (Tag Text -> Bool)
-               -> ParserT Text st m Text
+               -> ParserT Sources st m Text
 htmlInBalanced f = try $ do
   lookAhead (char '<')
-  inp <- getInput
-  let ts = canonicalizeTags $
-        parseTagsOptions parseOptions{ optTagWarning = True,
-                                       optTagPosition = True } inp
+  sources <- getInput
+  let ts = canonicalizeTags
+        $ parseTagsOptions parseOptions{ optTagWarning = True,
+                                         optTagPosition = True }
+        $ sourcesToText sources
   case ts of
     (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
        guard $ f t
@@ -1018,15 +1021,17 @@ hasTagWarning _                = False
 -- | Matches a tag meeting a certain condition.
 htmlTag :: (HasReaderOptions st, Monad m)
         => (Tag Text -> Bool)
-        -> ParserT Text st m (Tag Text, Text)
+        -> ParserT Sources st m (Tag Text, Text)
 htmlTag f = try $ do
   lookAhead (char '<')
   startpos <- getPosition
-  inp <- getInput
+  sources <- getInput
+  let inp = sourcesToText sources
   let ts = canonicalizeTags $ parseTagsOptions
                                parseOptions{ optTagWarning = False
                                            , optTagPosition = True }
-                               (inp <> " ") -- add space to ensure that
+                               (inp <> " ")
+                               -- add space to ensure that
                                -- we get a TagPosition after the tag
   (next, ln, col) <- case ts of
                       (TagPosition{} : next : TagPosition ln col : _)
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 48454e353..35eaac0a9 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -19,7 +19,7 @@ import Control.Monad.Except (throwError)
 import Data.List (intersperse)
 import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
-import Data.Text (Text, unpack)
+import Data.Text (unpack)
 import qualified Data.Text as T
 import Documentation.Haddock.Parser
 import Documentation.Haddock.Types as H
@@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.Shared (splitTextBy, trim)
 
 
 -- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: PandocMonad m
+readHaddock :: (PandocMonad m, ToSources a)
             => ReaderOptions
-            -> Text
+            -> a
             -> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
+readHaddock opts s = case readHaddockEither opts
+                           (unpack . sourcesToText . toSources $ s) of
   Right result -> return result
   Left e       -> throwError e
 
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index 70296bb6b..cd1093109 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -39,10 +39,12 @@ import Data.Aeson as Aeson
 import Control.Monad.Except (throwError)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readIpynb opts t = do
-  let src = BL.fromStrict (TE.encodeUtf8 t)
+readIpynb :: (PandocMonad m, ToSources a)
+          => ReaderOptions -> a -> m Pandoc
+readIpynb opts x = do
+  let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
   case eitherDecode src of
     Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
     Left _ ->
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c068f3774..9cdbf1611 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.Pandoc.XML.Light
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type JATS m = StateT JATSState m
 
@@ -52,10 +53,14 @@ instance Default JATSState where
                  , jatsContent = [] }
 
 
-readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readJATS :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readJATS _ inp = do
+  let sources = toSources inp
   tree <- either (throwError . PandocXMLError "") return $
-            parseXMLContents (TL.fromStrict $ crFilter inp)
+            parseXMLContents (TL.fromStrict . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 89aecbf56..a3b415f09 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
 import Text.Pandoc.Error (PandocError (PandocParseError))
 import Text.Pandoc.Options (ReaderOptions)
 import Text.Pandoc.Shared (stringify)
-
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import qualified Text.Jira.Markup as Jira
 
 -- | Read Jira wiki markup.
-readJira :: PandocMonad m
+readJira :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
-readJira _opts s = case parse s of
-  Right d -> return $ jiraToPandoc d
-  Left e  -> throwError . PandocParseError $
-             "Jira parse error" `append` pack (show e)
+readJira _opts inp = do
+  let sources = toSources inp
+  case parse (sourcesToText sources) of
+    Right d -> return $ jiraToPandoc d
+    Left e  -> throwError . PandocParseError $
+               "Jira parse error" `append` pack (show e)
 
 jiraToPandoc :: Jira.Doc -> Pandoc
 jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9ad168293..f90d562ae 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty)
 -- import Debug.Trace (traceShowId)
 
 -- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: PandocMonad m
+readLaTeX :: (PandocMonad m, ToSources a)
           => ReaderOptions -- ^ Reader options
-          -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+          -> a             -- ^ Input to parse
           -> m Pandoc
 readLaTeX opts ltx = do
+  let sources = toSources ltx
   parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
-               (tokenize "source" (crFilter ltx))
+               (tokenizeSources sources)
   case parsed of
     Right result -> return result
-    Left e       -> throwError $ PandocParsecError ltx e
+    Left e       -> throwError $ PandocParsecError sources e
 
 parseLaTeX :: PandocMonad m => LP m Pandoc
 parseLaTeX = do
@@ -132,11 +133,11 @@ resolveRefs _ x = x
 
 
 rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-              => ParserT Text s m Text
+              => ParserT Sources s m Text
 rawLaTeXBlock = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
       <|> rawLaTeXParser toks True
              (do choice (map controlSeq
@@ -163,11 +164,11 @@ beginOrEndCommand = try $ do
                     (txt <> untokenize rawargs)
 
 rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-               => ParserT Text s m Text
+               => ParserT Sources s m Text
 rawLaTeXInline = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   raw <- snd <$>
           (   rawLaTeXParser toks True
               (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
@@ -178,11 +179,11 @@ rawLaTeXInline = do
   finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
   return $ raw <> T.pack finalbraces
 
-inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
 inlineCommand = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
           inlines
 
@@ -641,7 +642,7 @@ opt = do
   parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
   case parsed of
     Right result -> return result
-    Left e       -> throwError $ PandocParsecError (untokenize toks) e
+    Left e       -> throwError $ PandocParsecError (toSources toks) e
 
 -- block elements:
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
index 655823dab..af97125c6 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Citation.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do
       runParserT (mconcat <$> many inline) st "bracketed option" toks
     case parsed of
       Right result -> return result
-      Left e       -> throwError $ PandocParsecError (untokenize toks) e
+      Left e       -> throwError $ PandocParsecError (toSources toks) e
 
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index db58b333d..35ce3509d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawLaTeXParser
   , applyMacros
   , tokenize
+  , tokenizeSources
   , untokenize
   , untoken
   , totoks
@@ -248,7 +249,7 @@ withVerbatimMode parser = do
 
 rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
                => [Tok] -> Bool -> LP m a -> LP m a
-               -> ParserT Text s m (a, Text)
+               -> ParserT Sources s m (a, Text)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
@@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do
               Left _    -> mzero
               Right ((val, raw), st) -> do
                 updateState (updateMacros (sMacros st <>))
-                _ <- takeP (T.length (untokenize toks'))
+                void $ count (T.length (untokenize toks')) anyChar
                 let result = untokenize raw
                 -- ensure we end with space if input did, see #4442
                 let result' =
@@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do
                 return (val, result')
 
 applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-            => Text -> ParserT Text s m Text
+            => Text -> ParserT Sources s m Text
 applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
    do let retokenize = untokenize <$> many (satisfyTok (const True))
       pstate <- getState
@@ -301,6 +302,11 @@ QuickCheck property:
 >   let t = T.pack s in untokenize (tokenize "random" t) == t
 -}
 
+tokenizeSources :: Sources -> [Tok]
+tokenizeSources = concatMap tokenizeSource . unSources
+ where
+   tokenizeSource (pos, t) = totoks pos t
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index f8c214318..c20b72bc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleInstances #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Types
    Copyright   : Copyright (C) 2017-2021 John MacFarlane
@@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
                                        )
 where
 import Data.Text (Text)
-import Text.Parsec.Pos (SourcePos)
+import Text.Parsec.Pos (SourcePos, sourceName)
+import Text.Pandoc.Sources
+import Data.List (groupBy)
 
 data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
                Esc1    | Esc2   | Arg Int
@@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
 data Tok = Tok SourcePos TokType Text
      deriving (Eq, Ord, Show)
 
+instance ToSources [Tok] where
+  toSources = Sources
+    . map (\ts -> case ts of
+                    Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
+                    _ -> error "toSources [Tok] encountered empty group")
+    . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
+
+tokToText :: Tok -> Text
+tokToText (Tok _ _ t) = t
+
 data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
      deriving (Eq, Ord, Show)
 
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 21b8feaab..1141af66f 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
 import Control.Monad.Trans (lift)
 import Control.Monad.Except (throwError)
 import Data.Maybe (catMaybes, isJust)
-import Data.List (intersperse, intercalate)
+import Data.List (intersperse)
 import qualified Data.Text as T
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
@@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Walk (query)
-import Text.Pandoc.Shared (crFilter, mapLeft)
+import Text.Pandoc.Shared (mapLeft)
 import Text.Pandoc.Readers.Roff  -- TODO explicit imports
-import Text.Parsec hiding (tokenPrim)
 import qualified Text.Parsec as Parsec
 import Text.Parsec.Pos (updatePosString)
 import qualified Data.Foldable as Foldable
@@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
 
 
 -- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
-readMan opts txt = do
-  tokenz <- lexRoff (initialPos "input") (crFilter txt)
+readMan :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
+readMan opts s = do
+  let Sources inps = toSources s
+  tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
   let state = def {readerOptions = opts} :: ManState
+  let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
+      fixError e = e
   eitherdoc <- readWithMTokens parseMan state
      (Foldable.toList . unRoffTokens $ tokenz)
-  either throwError return eitherdoc
+  either (throwError . fixError) return eitherdoc
+
 
 readWithMTokens :: PandocMonad m
         => ParserT [RoffToken] ManState m a  -- ^ parser
@@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
         -> [RoffToken]                       -- ^ input
         -> m (Either PandocError a)
 readWithMTokens parser state input =
-  let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
+  let leftF = PandocParsecError mempty
   in mapLeft leftF `liftM` runParserT parser state "source" input
 
+
 parseMan :: PandocMonad m => ManParser m Pandoc
 parseMan = do
   bs <- many parseBlock <* eof
@@ -89,7 +96,7 @@ parseBlock = choice [ parseList
 
 parseTable :: PandocMonad m => ManParser m Blocks
 parseTable = do
-  modifyState $ \st -> st { tableCellsPlain = True }
+  updateState $ \st -> st { tableCellsPlain = True }
   let isTbl Tbl{} = True
       isTbl _     = False
   Tbl _opts rows pos <- msatisfy isTbl
@@ -135,7 +142,7 @@ parseTable = do
         case res' of
           Left _  -> Prelude.fail "Could not parse table cell"
           Right x -> do
-            modifyState $ \s -> s{ tableCellsPlain = False }
+            updateState $ \s -> s{ tableCellsPlain = False }
             return x
       Right x -> return x
 
@@ -222,7 +229,7 @@ parseTitle = do
                       setMeta "section" (linePartsToInlines y)
          [x]       -> setMeta "title" (linePartsToInlines x)
          []        -> id
-  modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
+  updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
   return mempty
 
 linePartsToInlines :: [LinePart] -> Inlines
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index ba8ed147e..69dd51bc4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared
 import Text.Pandoc.XML (fromEntities)
 import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
+-- import Debug.Trace (traceShowId)
 
-type MarkdownParser m = ParserT Text ParserState m
+type MarkdownParser m = ParserT Sources ParserState m
 
 type F = Future ParserState
 
 -- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: PandocMonad m
+readMarkdown :: (PandocMonad m, ToSources a)
              => ReaderOptions -- ^ Reader options
-             -> Text      -- ^ String to parse (assuming @'\n'@ line endings)
+             -> a             -- ^ Input
              -> m Pandoc
 readMarkdown opts s = do
   parsed <- readWithM parseMarkdown def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+                (ensureFinalNewlines 3 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do
         meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
         setPosition oldPos
         return $ runF meta defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do
         refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
         setPosition oldPos
         return $ runF refs defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -146,14 +147,14 @@ inList = do
   ctx <- stateParserContext <$> getState
   guard (ctx == ListItemState)
 
-spnl :: PandocMonad m => ParserT Text st m ()
+spnl :: PandocMonad m => ParserT Sources st m ()
 spnl = try $ do
   skipSpaces
   optional newline
   skipSpaces
   notFollowedBy (char '\n')
 
-spnl' :: PandocMonad m => ParserT Text st m Text
+spnl' :: PandocMonad m => ParserT Sources st m Text
 spnl' = try $ do
   xs <- many spaceChar
   ys <- option "" $ try $ (:) <$> newline
@@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _)
 -- hrule block
 --
 
-hrule :: PandocMonad m => ParserT Text st m (F Blocks)
+hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
 hrule = try $ do
   skipSpaces
   start <- satisfy isHruleChar
@@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline
 blockDelimiter :: PandocMonad m
                => (Char -> Bool)
                -> Maybe Int
-               -> ParserT Text ParserState m Int
+               -> ParserT Sources ParserState m Int
 blockDelimiter f len = try $ do
   skipNonindentSpaces
   c <- lookAhead (satisfy f)
@@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do
   blanklines
   return $ T.intercalate "\n" lns'
 
-birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
+birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
 birdTrackLine c = try $ do
   char c
   -- allow html tags on left margin:
@@ -1025,7 +1026,7 @@ para = try $ do
   option (B.plain <$> result)
     $ try $ do
             newline
-            (blanklines >> return mempty)
+            (mempty <$ blanklines)
               <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
               <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
               <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
@@ -1170,7 +1171,7 @@ lineBlock = do
 -- and the length including trailing space.
 dashedLine :: PandocMonad m
            => Char
-           -> ParserT Text st m (Int, Int)
+           -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many spaceChar
@@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m
              -> MarkdownParser m [Text]
 rawTableLine indices = do
   notFollowedBy' (blanklines' <|> tableFooter)
-  line <- take1WhileP (/='\n') <* newline
+  line <- anyLine
   return $ map trim $ tail $
            splitTextByIndices (init indices) line
 
@@ -1390,7 +1391,7 @@ pipeTableCell =
       return $ B.plain <$> result)
     <|> return mempty
 
-pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
 pipeTableHeaderPart = try $ do
   skipMany spaceChar
   left <- optionMaybe (char ':')
@@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do
       (Just _,Just _)   -> AlignCenter, len)
 
 -- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT Text st m ()
+scanForPipe :: PandocMonad m => ParserT Sources st m ()
 scanForPipe = do
-  inp <- getInput
-  case T.break (\c -> c == '\n' || c == '|') inp of
+  Sources inps <- getInput
+  let ln = case inps of
+             [] -> ""
+             ((_,t):(_,t'):_) | T.null t -> t'
+             ((_,t):_) -> t
+  case T.break (\c -> c == '\n' || c == '|') ln of
        (_, T.uncons -> Just ('|', _)) -> return ()
        _                              -> mzero
 
@@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
   where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
         regsp = skipMany spaceChar >> return B.space
 
-nonEndline :: PandocMonad m => ParserT Text st m Char
+nonEndline :: PandocMonad m => ParserT Sources st m Char
 nonEndline = satisfy (/='\n')
 
 str :: PandocMonad m => MarkdownParser m (F Inlines)
 str = do
   result <- mconcat <$> many1
-             ( take1WhileP isAlphaNum
+             ( T.pack <$> (many1 alphaNum)
               <|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
   updateLastStrPos
   (do guardEnabled Ext_smart
@@ -1962,7 +1967,7 @@ rawLaTeXInline' = do
     s <- rawLaTeXInline
     return $ return $ B.rawInline "tex" s -- "tex" because it might be context
 
-rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
+rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
 rawConTeXtEnvironment = try $ do
   string "\\start"
   completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do
                        (try $ string "\\stop" >> textStr completion)
   return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
 
-inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
+inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
 inBrackets parser = do
   char '['
   contents <- manyChar parser
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 9f4d5e170..825e4a2eb 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -36,17 +36,18 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
                            trim, splitTextBy, tshow)
 import Text.Pandoc.Walk (walk)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: PandocMonad m
-              => ReaderOptions -- ^ Reader options
-              -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+readMediaWiki :: (PandocMonad m, ToSources a)
+              => ReaderOptions
+              -> a
               -> m Pandoc
 readMediaWiki opts s = do
+  let sources = toSources s
   parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
                                             , mwMaxNestingLevel = 4
                                             , mwNextLinkNumber  = 1
@@ -55,7 +56,7 @@ readMediaWiki opts s = do
                                             , mwLogMessages = []
                                             , mwInTT = False
                                             }
-            (crFilter s <> "\n")
+            sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -69,7 +70,7 @@ data MWState = MWState { mwOptions         :: ReaderOptions
                        , mwInTT            :: Bool
                        }
 
-type MWParser m = ParserT Text MWState m
+type MWParser m = ParserT Sources MWState m
 
 instance HasReaderOptions MWState where
   extractReaderOptions = mwOptions
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index cb141cba5..bbcfe62ea 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL
 import qualified Text.Pandoc.UTF8 as UTF8
 
 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
-             => ParserT Text st m (Future st MetaValue)
+             => ParserT Sources st m (Future st MetaValue)
              -> BL.ByteString
-             -> ParserT Text st m (Future st Meta)
+             -> ParserT Sources st m (Future st Meta)
 yamlBsToMeta pMetaValue bstr = do
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing
 
 -- Returns filtered list of references.
 yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
-             => ParserT Text st m (Future st MetaValue)
+             => ParserT Sources st m (Future st MetaValue)
              -> (Text -> Bool) -- ^ Filter for id
              -> BL.ByteString
-             -> ParserT Text st m (Future st [MetaValue])
+             -> ParserT Sources st m (Future st [MetaValue])
 yamlBsToRefs pMetaValue idpred bstr =
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
 nodeToKey _                                   = Nothing
 
 normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
-                   => ParserT Text st m (Future st MetaValue)
+                   => ParserT Sources st m (Future st MetaValue)
                    -> Text
-                   -> ParserT Text st m (Future st MetaValue)
+                   -> ParserT Sources st m (Future st MetaValue)
 normalizeMetaValue pMetaValue x =
    -- Note: a standard quoted or unquoted YAML value will
    -- not end in a newline, but a "block" set off with
@@ -133,9 +133,9 @@ checkBoolean t
   | otherwise = Nothing
 
 yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
-                => ParserT Text st m (Future st MetaValue)
+                => ParserT Sources st m (Future st MetaValue)
                 -> YAML.Node YE.Pos
-                -> ParserT Text st m (Future st MetaValue)
+                -> ParserT Sources st m (Future st MetaValue)
 yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
   case x of
        YAML.SStr t       -> normalizeMetaValue pMetaValue t
@@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
 yamlToMetaValue _ _ = return $ return $ MetaString ""
 
 yamlMap :: (PandocMonad m, HasLastStrPosition st)
-        => ParserT Text st m (Future st MetaValue)
+        => ParserT Sources st m (Future st MetaValue)
         -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
-        -> ParserT Text st m (Future st (M.Map Text MetaValue))
+        -> ParserT Sources st m (Future st (M.Map Text MetaValue))
 yamlMap pMetaValue o = do
     kvs <- forM (M.toList o) $ \(key, v) -> do
              k <- maybe (throwError $ PandocParseError
@@ -177,8 +177,8 @@ yamlMap pMetaValue o = do
 
 -- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
 yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
-              => ParserT Text st m (Future st MetaValue)
-              -> ParserT Text st m (Future st Meta)
+              => ParserT Sources st m (Future st MetaValue)
+              -> ParserT Sources st m (Future st Meta)
 yamlMetaBlock parser = try $ do
   string "---"
   blankline
@@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do
   optional blanklines
   yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
 
-stopLine :: Monad m => ParserT Text st m ()
+stopLine :: Monad m => ParserT Sources st m ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index 698bfd3d7..a0d4534f1 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
-import Text.Pandoc.Shared (crFilter, trimr, tshow)
+import Text.Pandoc.Shared (trimr, tshow)
 
 -- | Read Muse from an input string and return a Pandoc document.
-readMuse :: PandocMonad m
+readMuse :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
 readMuse opts s = do
-  let input = crFilter s
-  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
+  let sources = toSources s
+  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
+              (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
 type F = Future MuseState
@@ -82,7 +83,7 @@ instance Default MuseEnv where
                 , museInPara = False
                 }
 
-type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
+type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
 
 instance HasReaderOptions MuseState where
   extractReaderOptions = museOptions
@@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
 -- * Parsers
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 getIndent :: PandocMonad m
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 9c8bc0374..58f235e81 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
 import Data.Text (Text)
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read native formatted text and return a Pandoc document.
 -- The input may be a full pandoc document, a block list, a block,
@@ -32,14 +33,15 @@ import Text.Pandoc.Error
 --
 -- > Pandoc nullMeta [Plain [Str "hi"]]
 --
-readNative :: PandocMonad m
+readNative :: (PandocMonad m, ToSources a)
            => ReaderOptions
-           -> Text       -- ^ String to parse (assuming @'\n'@ line endings)
+           -> a
            -> m Pandoc
 readNative _ s =
-  case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
-    Right doc -> return doc
-    Left _    -> throwError $ PandocParseError "couldn't read native"
+  let t = sourcesToText . toSources $ s
+  in  case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
+        Right doc -> return doc
+        Left _    -> throwError $ PandocParseError "couldn't read native"
 
 readBlocks :: Text -> Either PandocError [Block]
 readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 5f2ddb876..668c9ca11 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -24,7 +24,8 @@ import Text.Pandoc.Options
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
-import Text.Pandoc.Shared (crFilter, blocksToInlines')
+import Text.Pandoc.Shared (blocksToInlines')
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.XML.Light
 import Control.Monad.Except (throwError)
 
@@ -46,10 +47,14 @@ instance Default OPMLState where
                  , opmlOptions = def
                  }
 
-readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readOPML :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readOPML opts inp  = do
-  (bs, st') <- runStateT
-                 (case parseXMLContents (TL.fromStrict (crFilter inp)) of
+  let sources = toSources inp
+  (bs, st') <-
+    runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
                      Left msg -> throwError $ PandocXMLError "" msg
                      Right ns -> mapM parseBlock ns)
                  def{ opmlOptions = opts }
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index afeb27a87..8823befdd 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing (reportLogMessages)
-import Text.Pandoc.Shared (crFilter)
-
+import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
 import Control.Monad.Except (throwError)
 import Control.Monad.Reader (runReaderT)
 
-import Data.Text (Text)
-
 -- | Parse org-mode string and return a Pandoc document.
-readOrg :: PandocMonad m
+readOrg :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readOrg opts s = do
   parsed <- flip runReaderT def $
             readWithM parseOrg (optionsToParserState opts)
-            (crFilter s <> "\n\n")
+            (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left  e      -> throwError e
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 519a6ce04..054f2611a 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -29,6 +29,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
+import Text.Pandoc.Sources (ToSources(..))
 import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 
 import Control.Monad (guard, mplus, mzero, unless, void, when)
@@ -802,7 +803,7 @@ inlineLaTeX = try $ do
    parseAsInlineLaTeX :: PandocMonad m
                       => Text -> TeXExport -> OrgParser m (Maybe Inlines)
    parseAsInlineLaTeX cs = \case
-     TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
+     TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
      TeXIgnore -> return (Just mempty)
      TeXVerbatim -> return (Just $ B.str cs)
 
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 6ed24a602..c7ea02815 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -114,7 +114,7 @@ import Control.Monad (guard)
 import Control.Monad.Reader (ReaderT)
 
 -- | The parser used to read org files.
-type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
 
 --
 -- Adaptions and specializations of parsing utilities
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index ac4c0b6cb..a3fcf028c 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -38,25 +38,24 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Printf (printf)
 import Data.Time.Format
 
 -- TODO:
 -- [ ] .. parsed-literal
 
 -- | Parse reStructuredText string and return Pandoc document.
-readRST :: PandocMonad m
+readRST :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readRST opts s = do
   parsed <- readWithM parseRST def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+               (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
 
-type RSTParser m = ParserT Text ParserState m
+type RSTParser m = ParserT Sources ParserState m
 
 --
 -- Constants and data structure definitions
@@ -151,11 +150,19 @@ parseRST = do
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- T.concat <$>
-                  manyTill (referenceKey <|> anchorDef <|>
-                            noteBlock <|> citationBlock <|>
-                            (snd <$> withRaw comment) <|>
-                            headerBlock <|> lineClump) eof
+  let chunk = referenceKey
+              <|> anchorDef
+              <|> noteBlock
+              <|> citationBlock
+              <|> (snd <$> withRaw comment)
+              <|> headerBlock
+              <|> lineClump
+  docMinusKeys <- Sources <$>
+                  manyTill (do pos <- getPosition
+                               t <- chunk
+                               return (pos, t)) eof
+  -- UGLY: we collapse source position information.
+  -- TODO: fix the parser to use the F monad instead of two passes
   setInput docMinusKeys
   setPosition startPos
   st' <- getState
@@ -348,7 +355,7 @@ singleHeader' = try $ do
 -- hrule block
 --
 
-hrule :: Monad m => ParserT Text st m Blocks
+hrule :: Monad m => ParserT Sources st m Blocks
 hrule = try $ do
   chr <- oneOf underlineChars
   count 3 (char chr)
@@ -363,7 +370,7 @@ hrule = try $ do
 
 -- read a line indented by a given string
 indentedLine :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m Text
+             => Int -> ParserT Sources st m Text
 indentedLine indents = try $ do
   lookAhead spaceChar
   gobbleAtMostSpaces indents
@@ -372,7 +379,7 @@ indentedLine indents = try $ do
 -- one or more indented lines, possibly separated by blank lines.
 -- any amount of indentation will work.
 indentedBlock :: (HasReaderOptions st, Monad m)
-              => ParserT Text st m Text
+              => ParserT Sources st m Text
 indentedBlock = try $ do
   indents <- length <$> lookAhead (many1 spaceChar)
   lns <- many1 $ try $ do b <- option "" blanklines
@@ -381,20 +388,20 @@ indentedBlock = try $ do
   optional blanklines
   return $ T.unlines lns
 
-quotedBlock :: Monad m => ParserT Text st m Text
+quotedBlock :: Monad m => ParserT Sources st m Text
 quotedBlock = try $ do
     quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
     lns <- many1 $ lookAhead (char quote) >> anyLine
     optional blanklines
     return $ T.unlines lns
 
-codeBlockStart :: Monad m => ParserT Text st m Char
+codeBlockStart :: Monad m => ParserT Sources st m Char
 codeBlockStart = string "::" >> blankline >> blankline
 
-codeBlock :: Monad m => ParserT Text ParserState m Blocks
+codeBlock :: Monad m => ParserT Sources ParserState m Blocks
 codeBlock = try $ codeBlockStart >> codeBlockBody
 
-codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
+codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
 codeBlockBody = do
   lang <- stateRstHighlight <$> getState
   try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
@@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
   return $ B.codeBlockWith ("", ["haskell","literate"], [])
          $ T.intercalate "\n" lns
 
-latexCodeBlock :: Monad m => ParserT Text st m [Text]
+latexCodeBlock :: Monad m => ParserT Sources st m [Text]
 latexCodeBlock = try $ do
   try (latexBlockLine "\\begin{code}")
   many1Till anyLine (try $ latexBlockLine "\\end{code}")
  where
   latexBlockLine s = skipMany spaceChar >> string s >> blankline
 
-birdCodeBlock :: Monad m => ParserT Text st m [Text]
+birdCodeBlock :: Monad m => ParserT Sources st m [Text]
 birdCodeBlock = filterSpace <$> many1 birdTrackLine
   where filterSpace lns =
             -- if (as is normal) there is always a space after >, drop it
@@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
                then map (T.drop 1) lns
                else lns
 
-birdTrackLine :: Monad m => ParserT Text st m Text
+birdTrackLine :: Monad m => ParserT Sources st m Text
 birdTrackLine = char '>' >> anyLine
 
 --
@@ -456,7 +463,6 @@ includeDirective top fields body = do
   let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
   let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
   oldPos <- getPosition
-  oldInput <- getInput
   containers <- stateContainers <$> getState
   when (f `elem` containers) $
     throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
@@ -494,15 +500,11 @@ includeDirective top fields body = do
        Nothing   -> case lookup "literal" fields of
                          Just _  -> return $ B.rawBlock "rst" contents'
                          Nothing -> do
-                           setPosition $ newPos (T.unpack f) 1 1
-                           setInput $ contents' <> "\n"
-                           bs <- optional blanklines >>
-                                  (mconcat <$> many block)
-                           setInput oldInput
-                           setPosition oldPos
+                           addToSources (initialPos (T.unpack f))
+                              (contents' <> "\n")
                            updateState $ \s -> s{ stateContainers =
                                          tail $ stateContainers s }
-                           return bs
+                           return mempty
 
 
 --
@@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
 definitionList = B.definitionList <$> many1 definitionListItem
 
 -- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT Text st m Int
+bulletListStart :: Monad m => ParserT Sources st m Int
 bulletListStart = try $ do
   notFollowedBy' hrule  -- because hrules start out just like lists
   marker <- oneOf bulletListMarkers
@@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do
 -- plus isolated (no two adjacent) internal hyphens, underscores,
 -- periods, colons and plus signs; no whitespace or other characters
 -- are allowed.
-simpleReferenceName :: Monad m => ParserT Text st m Text
+simpleReferenceName :: Monad m => ParserT Sources st m Text
 simpleReferenceName = do
   x <- alphaNum
   xs <- many $  alphaNum
@@ -1122,7 +1124,7 @@ referenceKey = do
   -- return enough blanks to replace key
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
-targetURI :: Monad m => ParserT Text st m Text
+targetURI :: Monad m => ParserT Sources st m Text
 targetURI = do
   skipSpaces
   optional $ try $ newline >> notFollowedBy blankline
@@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m ()
 anonymousKey = try $ do
   oneOfStrings [".. __:", "__"]
   src <- targetURI
-  pos <- getPosition
-  let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
+  -- we need to ensure that the keys are ordered by occurrence in
+  -- the document.
+  numKeys <- M.size . stateKeys <$> getState
+  let key = toKey $ "_" <> T.pack (show numKeys)
   updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
                           stateKeys s }
 
@@ -1250,13 +1254,13 @@ headerBlock = do
 -- Grid tables TODO:
 --  - column spans
 
-dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many (char ' ')
   return (length dashes, length $ dashes ++ sp)
 
-simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
 simpleDashedLines ch = try $ many1 (dashedLine ch)
 
 -- Parse a table row separator
@@ -1382,7 +1386,7 @@ hyphens = do
   -- don't want to treat endline after hyphen or dash as a space
   return $ B.str result
 
-escapedChar :: Monad m => ParserT Text st m Inlines
+escapedChar :: Monad m => ParserT Sources st m Inlines
 escapedChar = do c <- escaped anyChar
                  return $ if c == ' ' || c == '\n' || c == '\r'
                              -- '\ ' is null in RST
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 509ce1377..47f16ef4b 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared (safeRead)
-import Text.Parsec hiding (tokenPrim)
 import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
 import qualified Data.Sequence as Seq
 import qualified Data.Foldable as Foldable
@@ -122,16 +121,16 @@ instance Default RoffState where
                   , afterConditional = False
                   }
 
-type RoffLexer m = ParserT T.Text RoffState m
+type RoffLexer m = ParserT Sources RoffState m
 
 --
 -- Lexer: T.Text -> RoffToken
 --
 
-eofline :: Stream s m Char => ParsecT s u m ()
+eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
 eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
 
-spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
 spacetab = char ' ' <|> char '\t'
 
 characterCodeMap :: M.Map T.Text Char
@@ -303,8 +302,7 @@ expandString = try $ do
   char '*'
   cs <- escapeArg <|> countChar 1 anyChar
   s <- linePartsToText <$> resolveText cs pos
-  getInput >>= setInput . (s <>)
-  return ()
+  addToInput s
 
 -- Parses: '..'
 quoteArg :: PandocMonad m => RoffLexer m T.Text
@@ -316,7 +314,7 @@ escFont = do
   font' <- if T.null font || font == "P"
               then prevFont <$> getState
               else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
-  modifyState $ \st -> st{ prevFont = currentFont st
+  updateState $ \st -> st{ prevFont = currentFont st
                          , currentFont = font' }
   return [Font font']
   where
@@ -372,8 +370,8 @@ lexTable pos = do
   spaces
   opts <- try tableOptions <|> [] <$ optional (char ';')
   case lookup "tab" opts of
-    Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
-    _                              -> modifyState $ \st -> st{ tableTabChar = '\t' }
+    Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
+    _                              -> updateState $ \st -> st{ tableTabChar = '\t' }
   spaces
   skipMany lexComment
   spaces
@@ -489,18 +487,18 @@ lexConditional mname = do
   ifPart <- do
       optional $ try $ char '\\' >> newline
       lexGroup
-       <|> do modifyState $ \s -> s{ afterConditional = True }
+       <|> do updateState $ \s -> s{ afterConditional = True }
               t <- manToken
-              modifyState $ \s -> s{ afterConditional = False }
+              updateState $ \s -> s{ afterConditional = False }
               return t
   case mbtest of
     Nothing    -> do
-      putState st  -- reset state, so we don't record macros in skipped section
+      setState st  -- reset state, so we don't record macros in skipped section
       report $ SkippedContent (T.cons '.' mname) pos
       return mempty
     Just True  -> return ifPart
     Just False -> do
-      putState st
+      setState st
       return mempty
 
 expression :: PandocMonad m => RoffLexer m (Maybe Bool)
@@ -515,7 +513,7 @@ expression = do
       _    -> Nothing
   where
     returnValue v = do
-      modifyState $ \st -> st{ lastExpression = v }
+      updateState $ \st -> st{ lastExpression = v }
       return v
 
 lexGroup :: PandocMonad m => RoffLexer m RoffTokens
@@ -536,7 +534,7 @@ lexIncludeFile args = do
       result <- readFileFromDirs dirs $ T.unpack fp
       case result of
         Nothing  -> report $ CouldNotLoadIncludeFile fp pos
-        Just s   -> getInput >>= setInput . (s <>)
+        Just s   -> addToInput s
       return mempty
     []    -> return mempty
 
@@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
      (x:ys) -> do
        let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
        let stringName = linePartsToText x
-       modifyState $ \st ->
+       updateState $ \st ->
          st{ customMacros = M.insert stringName ts (customMacros st) }
    return mempty
 
 lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
 lexMacroDef args = do -- macro definition
-   modifyState $ \st -> st{ roffMode = CopyMode }
+   updateState $ \st -> st{ roffMode = CopyMode }
    (macroName, stopMacro) <-
      case args of
        (x : y : _) -> return (linePartsToText x, linePartsToText y)
@@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
          _ <- lexArgs
          return ()
    ts <- mconcat <$> manyTill manToken stop
-   modifyState $ \st ->
+   updateState $ \st ->
      st{ customMacros = M.insert macroName ts (customMacros st)
        , roffMode = NormalMode }
    return mempty
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index c4d7bcc93..276d28aaa 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -28,22 +28,22 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, tshow)
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
+readTWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTWiki opts s = do
-  res <- readWithM parseTWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TWParser = ParserT Text ParserState
+type TWParser = ParserT Sources ParserState
 
 --
 -- utility functions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 8d7900de4..981878206 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -53,30 +53,34 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Text.Pandoc.Shared (trim, tshow)
 
 -- | Parse a Textile text and return a Pandoc document.
-readTextile :: PandocMonad m
+readTextile :: (PandocMonad m, ToSources a)
             => ReaderOptions -- ^ Reader options
-            -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+            -> a
             -> m Pandoc
 readTextile opts s = do
-  parsed <- readWithM parseTextile def{ stateOptions = opts }
-                (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  parsed <- readWithM parseTextile def{ stateOptions = opts } sources
   case parsed of
      Right result -> return result
      Left e       -> throwError e
 
+type TextileParser = ParserT Sources ParserState
 
 -- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
+parseTextile :: PandocMonad m => TextileParser m Pandoc
 parseTextile = do
   many blankline
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys/notes were...
-  let firstPassParser = noteBlock <|> lineClump
-  manyTill firstPassParser eof >>= setInput . T.concat
+  let firstPassParser = do
+        pos <- getPosition
+        t <- noteBlock <|> lineClump
+        return (pos, t)
+  manyTill firstPassParser eof >>= setInput . Sources
   setPosition startPos
   st' <- getState
   let reversedNotes = stateNotes st'
@@ -84,10 +88,10 @@ parseTextile = do
   -- now parse it for real...
   Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
 
-noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker :: PandocMonad m => TextileParser m Text
 noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
 
-noteBlock :: PandocMonad m => ParserT Text ParserState m Text
+noteBlock :: PandocMonad m => TextileParser m Text
 noteBlock = try $ do
   startPos <- getPosition
   ref <- noteMarker
@@ -102,11 +106,11 @@ noteBlock = try $ do
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
 -- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
+parseBlocks :: PandocMonad m => TextileParser m Blocks
 parseBlocks = mconcat <$> manyTill block eof
 
 -- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
+blockParsers :: PandocMonad m => [TextileParser m Blocks]
 blockParsers = [ codeBlock
                , header
                , blockQuote
@@ -121,22 +125,22 @@ blockParsers = [ codeBlock
                ]
 
 -- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT Text ParserState m Blocks
+block :: PandocMonad m => TextileParser m Blocks
 block = do
   res <- choice blockParsers <?> "block"
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
-commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+commentBlock :: PandocMonad m => TextileParser m Blocks
 commentBlock = try $ do
   string "###."
   manyTill anyLine blanklines
   return mempty
 
-codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlock :: PandocMonad m => TextileParser m Blocks
 codeBlock = codeBlockTextile <|> codeBlockHtml
 
-codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockTextile :: PandocMonad m => TextileParser m Blocks
 codeBlockTextile = try $ do
   string "bc." <|> string "pre."
   extended <- option False (True <$ char '.')
@@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
 trimTrailingNewlines = T.dropWhileEnd (=='\n')
 
 -- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockHtml :: PandocMonad m => TextileParser m Blocks
 codeBlockHtml = try $ do
   (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
   result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
@@ -174,7 +178,7 @@ codeBlockHtml = try $ do
   return $ B.codeBlockWith (ident,classes,kvs) result'''
 
 -- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT Text ParserState m Blocks
+header :: PandocMonad m => TextileParser m Blocks
 header = try $ do
   char 'h'
   level <- digitToInt <$> oneOf "123456"
@@ -186,14 +190,14 @@ header = try $ do
   return $ B.headerWith attr' level name
 
 -- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
+blockQuote :: PandocMonad m => TextileParser m Blocks
 blockQuote = try $ do
   string "bq" >> attributes >> char '.' >> whitespace
   B.blockQuote <$> para
 
 -- Horizontal rule
 
-hrule :: PandocMonad m => ParserT Text st m Blocks
+hrule :: PandocMonad m => TextileParser m Blocks
 hrule = try $ do
   skipSpaces
   start <- oneOf "-*"
@@ -208,39 +212,39 @@ hrule = try $ do
 -- | Can be a bullet list or an ordered list. This implementation is
 -- strict in the nesting, sublist must start at exactly "parent depth
 -- plus one"
-anyList :: PandocMonad m => ParserT Text ParserState m Blocks
+anyList :: PandocMonad m => TextileParser m Blocks
 anyList = try $ anyListAtDepth 1 <* blanklines
 
 -- | This allow one type of list to be nested into an other type,
 -- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 anyListAtDepth depth = choice [ bulletListAtDepth depth,
                                 orderedListAtDepth depth,
                                 definitionList ]
 
 -- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListAtDepth depth = try $ B.bulletList  <$> many1 (bulletListItemAtDepth depth)
 
 -- | Bullet List Item of given depth, depth being the number of
 -- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListItemAtDepth = genericListItemAtDepth '*'
 
 -- | Ordered List of given depth, depth being the number of
 -- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListAtDepth depth = try $ do
   items <- many1 (orderedListItemAtDepth depth)
   return $ B.orderedList items
 
 -- | Ordered List Item of given depth, depth being the number of
 -- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListItemAtDepth = genericListItemAtDepth '#'
 
 -- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
 genericListItemAtDepth c depth = try $ do
   count depth (char c) >> attributes >> whitespace
   contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
   return $ contents <> sublist
 
 -- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
+definitionList :: PandocMonad m => TextileParser m Blocks
 definitionList = try $ B.definitionList <$> many1 definitionListItem
 
 -- | List start character.
-listStart :: PandocMonad m => ParserT Text ParserState m ()
+listStart :: PandocMonad m => TextileParser m ()
 listStart = genericListStart '*'
         <|> () <$ genericListStart '#'
         <|> () <$ definitionListStart
 
-genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
+genericListStart :: PandocMonad m => Char -> TextileParser m ()
 genericListStart c = () <$ try (many1 (char c) >> whitespace)
 
-basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
+basicDLStart :: PandocMonad m => TextileParser m ()
 basicDLStart = do
   char '-'
   whitespace
   notFollowedBy newline
 
-definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
+definitionListStart :: PandocMonad m => TextileParser m Inlines
 definitionListStart = try $ do
   basicDLStart
   trimInlines . mconcat <$>
@@ -281,15 +285,15 @@ definitionListStart = try $ do
 -- the term defined, then spaces and ":=". The definition follows, on
 -- the same single line, or spaned on multiple line, after a line
 -- break.
-definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
 definitionListItem = try $ do
   term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
   def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
   return (term, def')
-  where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+  where inlineDef :: PandocMonad m => TextileParser m [Blocks]
         inlineDef = liftM (\d -> [B.plain d])
                     $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
-        multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+        multilineDef :: PandocMonad m => TextileParser m [Blocks]
         multilineDef = try $ do
           optional whitespace >> newline
           s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
@@ -300,7 +304,7 @@ definitionListItem = try $ do
 -- raw content
 
 -- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
 rawHtmlBlock = try $ do
   skipMany spaceChar
   (_,b) <- htmlTag isBlockTag
@@ -308,14 +312,14 @@ rawHtmlBlock = try $ do
   return $ B.rawBlock "html" b
 
 -- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
 rawLaTeXBlock' = do
   guardEnabled Ext_raw_tex
   B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
 
 
 -- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT Text ParserState m Blocks
+para :: PandocMonad m => TextileParser m Blocks
 para = B.para . trimInlines . mconcat <$> many1 inline
 
 -- Tables
@@ -326,7 +330,7 @@ toAlignment '>' = AlignRight
 toAlignment '=' = AlignCenter
 toAlignment _   = AlignDefault
 
-cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
 cellAttributes = try $ do
   isHeader <- option False (True <$ char '_')
   -- we just ignore colspan and rowspan markers:
@@ -339,7 +343,7 @@ cellAttributes = try $ do
   return (isHeader, alignment)
 
 -- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
 tableCell = try $ do
   char '|'
   (isHeader, alignment) <- option (False, AlignDefault) cellAttributes
@@ -350,7 +354,7 @@ tableCell = try $ do
   return ((isHeader, alignment), B.plain content)
 
 -- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
 tableRow = try $ do
   -- skip optional row attributes
   optional $ try $ do
@@ -360,7 +364,7 @@ tableRow = try $ do
   many1 tableCell <* char '|' <* blankline
 
 -- | A table with an optional header.
-table :: PandocMonad m => ParserT Text ParserState m Blocks
+table :: PandocMonad m => TextileParser m Blocks
 table = try $ do
   -- ignore table attributes
   caption <- option mempty $ try $ do
@@ -388,7 +392,7 @@ table = try $ do
     (TableFoot nullAttr [])
 
 -- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
+ignorableRow :: PandocMonad m => TextileParser m ()
 ignorableRow = try $ do
   char '|'
   oneOf ":^-~"
@@ -397,7 +401,7 @@ ignorableRow = try $ do
   _ <- anyLine
   return ()
 
-explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
 explicitBlockStart name = try $ do
   string (T.unpack name)
   attributes
@@ -409,8 +413,8 @@ explicitBlockStart name = try $ do
 -- However, they can be used to set HTML/CSS attributes when needed.
 maybeExplicitBlock :: PandocMonad m
                    => Text  -- ^ block tag name
-                   -> ParserT Text ParserState m Blocks -- ^ implicit block
-                   -> ParserT Text ParserState m Blocks
+                   -> TextileParser m Blocks -- ^ implicit block
+                   -> TextileParser m Blocks
 maybeExplicitBlock name blk = try $ do
   optional $ explicitBlockStart name
   blk
@@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
 
 
 -- | Any inline element
-inline :: PandocMonad m => ParserT Text ParserState m Inlines
+inline :: PandocMonad m => TextileParser m Inlines
 inline = choice inlineParsers <?> "inline"
 
 -- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
+inlineParsers :: PandocMonad m => [TextileParser m Inlines]
 inlineParsers = [ str
                 , whitespace
                 , endline
@@ -447,7 +451,7 @@ inlineParsers = [ str
                 ]
 
 -- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineMarkup :: PandocMonad m => TextileParser m Inlines
 inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       , simpleInline (string "**") B.strong
                       , simpleInline (string "__") B.emph
@@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       ]
 
 -- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT Text st m Inlines
+mark :: PandocMonad m => TextileParser m Inlines
 mark = try $ char '(' >> (try tm <|> try reg <|> copy)
 
-reg :: PandocMonad m => ParserT Text st m Inlines
+reg :: PandocMonad m => TextileParser m Inlines
 reg = do
   oneOf "Rr"
   char ')'
   return $ B.str "\174"
 
-tm :: PandocMonad m => ParserT Text st m Inlines
+tm :: PandocMonad m => TextileParser m Inlines
 tm = do
   oneOf "Tt"
   oneOf "Mm"
   char ')'
   return $ B.str "\8482"
 
-copy :: PandocMonad m => ParserT Text st m Inlines
+copy :: PandocMonad m => TextileParser m Inlines
 copy = do
   oneOf "Cc"
   char ')'
   return $ B.str "\169"
 
-note :: PandocMonad m => ParserT Text ParserState m Inlines
+note :: PandocMonad m => TextileParser m Inlines
 note = try $ do
   ref <- char '[' *> many1 digit <* char ']'
   notes <- stateNotes <$> getState
@@ -507,13 +511,13 @@ wordBoundaries :: [Char]
 wordBoundaries = markupChars <> stringBreakers
 
 -- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
+hyphenedWords :: PandocMonad m => TextileParser m Text
 hyphenedWords = do
   x <- wordChunk
   xs <-  many (try $ char '-' >> wordChunk)
   return $ T.intercalate "-" (x:xs)
 
-wordChunk :: PandocMonad m => ParserT Text ParserState m Text
+wordChunk :: PandocMonad m => TextileParser m Text
 wordChunk = try $ do
   hd <- noneOf wordBoundaries
   tl <- many ( noneOf wordBoundaries <|>
@@ -522,7 +526,7 @@ wordChunk = try $ do
   return $ T.pack $ hd:tl
 
 -- | Any string
-str :: PandocMonad m => ParserT Text ParserState m Inlines
+str :: PandocMonad m => TextileParser m Inlines
 str = do
   baseStr <- hyphenedWords
   -- RedCloth compliance : if parsed word is uppercase and immediately
@@ -535,11 +539,11 @@ str = do
   return $ B.str fullStr
 
 -- | Some number of space chars
-whitespace :: PandocMonad m => ParserT Text st m Inlines
+whitespace :: PandocMonad m => TextileParser m Inlines
 whitespace = many1 spaceChar >> return B.space <?> "whitespace"
 
 -- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT Text ParserState m Inlines
+endline :: PandocMonad m => TextileParser m Inlines
 endline = try $ do
   newline
   notFollowedBy blankline
@@ -547,18 +551,18 @@ endline = try $ do
   notFollowedBy rawHtmlBlock
   return B.linebreak
 
-rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
+rawHtmlInline :: PandocMonad m => TextileParser m Inlines
 rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
 
 -- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
 rawLaTeXInline' = try $ do
   guardEnabled Ext_raw_tex
   B.rawInline "latex" <$> rawLaTeXInline
 
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
-link :: PandocMonad m => ParserT Text ParserState m Inlines
+link :: PandocMonad m => TextileParser m Inlines
 link = try $ do
   bracketed <- (True <$ char '[') <|> return False
   char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -578,7 +582,7 @@ link = try $ do
               else B.spanWith attr $ B.link url "" name'
 
 -- | image embedding
-image :: PandocMonad m => ParserT Text ParserState m Inlines
+image :: PandocMonad m => TextileParser m Inlines
 image = try $ do
   char '!' >> notFollowedBy space
   (ident, cls, kvs) <- attributes
@@ -590,51 +594,51 @@ image = try $ do
   char '!'
   return $ B.imageWith attr src alt (B.str alt)
 
-escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedInline :: PandocMonad m => TextileParser m Inlines
 escapedInline = escapedEqs <|> escapedTag
 
-escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs :: PandocMonad m => TextileParser m Inlines
 escapedEqs = B.str . T.pack <$>
   try (string "==" *> manyTill anyChar' (try $ string "=="))
 
 -- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag :: PandocMonad m => TextileParser m Inlines
 escapedTag = B.str . T.pack <$>
   try (string "<notextile>" *>
          manyTill anyChar' (try $ string "</notextile>"))
 
 -- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol :: PandocMonad m => TextileParser m Inlines
 symbol = B.str . T.singleton <$> (notFollowedBy newline *>
                                   notFollowedBy rawHtmlBlock *>
                                   oneOf wordBoundaries)
 
 -- | Inline code
-code :: PandocMonad m => ParserT Text ParserState m Inlines
+code :: PandocMonad m => TextileParser m Inlines
 code = code1 <|> code2
 
 -- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT Text ParserState m Char
+anyChar' :: PandocMonad m => TextileParser m Char
 anyChar' =
   satisfy (/='\n') <|>
   try (char '\n' <* notFollowedBy blankline)
 
-code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 :: PandocMonad m => TextileParser m Inlines
 code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
 
-code2 :: PandocMonad m => ParserT Text ParserState m Inlines
+code2 :: PandocMonad m => TextileParser m Inlines
 code2 = do
   htmlTag (tagOpen (=="tt") null)
   B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
 
 -- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT Text ParserState m Attr
+attributes :: PandocMonad m => TextileParser m Attr
 attributes = foldl' (flip ($)) ("",[],[]) <$>
   try (do special <- option id specialAttribute
           attrs <- many attribute
           return (special : attrs))
 
-specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 specialAttribute = do
   alignStr <- ("center" <$ char '=') <|>
     ("justify" <$ try (string "<>")) <|>
@@ -643,11 +647,11 @@ specialAttribute = do
   notFollowedBy spaceChar
   return $ addStyle $ T.pack $ "text-align:" ++ alignStr
 
-attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 attribute = try $
   (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
 
-classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 classIdAttr = try $ do -- (class class #id)
   char '('
   ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
@@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
        classes'
          -> return $ \(_,_,keyvals) -> ("",classes',keyvals)
 
-styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 styleAttr = do
   style <- try $ enclosed (char '{') (char '}') anyChar'
   return $ addStyle $ T.pack style
@@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) =
   where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
         style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
 
-langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 langAttr = do
   lang <- try $ enclosed (char '[') (char ']') alphaNum
   return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
 
 -- | Parses material surrounded by a parser.
 surrounded :: (PandocMonad m, Show t)
-           => ParserT Text st m t   -- ^ surrounding parser
-           -> ParserT Text st m a   -- ^ content parser (to be used repeatedly)
-           -> ParserT Text st m [a]
+           => ParserT Sources st m t   -- ^ surrounding parser
+           -> ParserT Sources st m a   -- ^ content parser (to be used repeatedly)
+           -> ParserT Sources st m [a]
 surrounded border =
   enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
 
 simpleInline :: PandocMonad m
-             => ParserT Text ParserState m t        -- ^ surrounding parser
+             => TextileParser m t        -- ^ surrounding parser
              -> (Inlines -> Inlines)                  -- ^ Inline constructor
-             -> ParserT Text ParserState m Inlines  -- ^ content parser (to be used repeatedly)
+             -> TextileParser m Inlines  -- ^ content parser (to be used repeatedly)
 simpleInline border construct = try $ do
   notAfterString
   border *> notFollowedBy (oneOf " \t\n\r")
@@ -700,7 +704,7 @@ simpleInline border construct = try $ do
            then body
            else B.spanWith attr body
 
-groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
 groupedInlineMarkup = try $ do
     char '['
     sp1 <- option mempty $ B.space <$ whitespace
@@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do
     char ']'
     return $ sp1 <> result <> sp2
 
-eof' :: Monad m => ParserT Text s m Char
+eof' :: Monad m => ParserT Sources s m Char
 eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index fb4b662c5..5c414fdec 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -30,23 +30,23 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Logging (Verbosity (..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, safeRead)
+import Text.Pandoc.Shared (safeRead)
 import Text.Pandoc.XML (fromEntities)
 import Text.Printf (printf)
 
 -- | Read TikiWiki from an input string and return a Pandoc document.
-readTikiWiki :: PandocMonad m
+readTikiWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTikiWiki opts s = do
-  res <- readWithM parseTikiWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TikiWikiParser = ParserT Text ParserState
+type TikiWikiParser = ParserT Sources ParserState
 
 --
 -- utility functions
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index d355a4b55..6f92f0063 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
 
-type T2T = ParserT Text ParserState (Reader T2TMeta)
+type T2T = ParserT Sources ParserState (Reader T2TMeta)
 
 -- | An object for the T2T macros meta information
 -- the contents of each field is simply substituted verbatim into the file
@@ -68,15 +68,15 @@ getT2TMeta = do
                      (intercalate ", " inps) outp
 
 -- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: PandocMonad m
+readTxt2Tags :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readTxt2Tags opts s = do
+  let sources = ensureFinalNewlines 2 (toSources s)
   meta <- getT2TMeta
   let parsed = flip runReader meta $
-        readWithM parseT2T (def {stateOptions = opts}) $
-        crFilter s <> "\n\n"
+        readWithM parseT2T (def {stateOptions = opts}) sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 74dac5ea7..460f304c4 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
                             many1Till, orderedListMarker, readWithM,
                             registerHeader, spaceChar, stateMeta,
                             stateOptions, uri, manyTillChar, manyChar, textStr,
-                            many1Char, countChar, many1TillChar)
-import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+                            many1Char, countChar, many1TillChar,
+                            alphaNum, anyChar, char, newline, noneOf, oneOf,
+                            space, spaces, string)
+import Text.Pandoc.Sources (ToSources(..), Sources)
+import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
                            isURI, tshow)
-import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
-                         spaces, string)
 import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
                                manyTill, notFollowedBy, option, skipMany1)
 import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
 
-readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readVimwiki opts s = do
-  res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
+  let sources = toSources s
+  res <- readWithM parseVimwiki def{ stateOptions = opts } sources
   case res of
        Left e       -> throwError e
        Right result -> return result
 
-type VwParser = ParserT Text ParserState
+type VwParser = ParserT Sources ParserState
 
 
 -- constants
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index e389c1727..920edca7b 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -298,6 +298,7 @@ tabFilter tabStop = T.unlines . map go . T.lines
                        (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
                        <> go (T.drop 1 s2)
 
+{-# DEPRECATED crFilter "readers filter crs automatically" #-}
 -- | Strip out DOS line endings.
 crFilter :: T.Text -> T.Text
 crFilter = T.filter (/= '\r')
diff --git a/src/Text/Pandoc/Sources.hs b/src/Text/Pandoc/Sources.hs
new file mode 100644
index 000000000..5511ccfb8
--- /dev/null
+++ b/src/Text/Pandoc/Sources.hs
@@ -0,0 +1,195 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Sources
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Defines Sources object to be used as input to pandoc parsers and redefines Char
+parsers so they get source position information from it.
+-}
+
+module Text.Pandoc.Sources
+  ( Sources(..)
+  , ToSources(..)
+  , UpdateSourcePos(..)
+  , sourcesToText
+  , initialSourceName
+  , addToSources
+  , ensureFinalNewlines
+  , addToInput
+  , satisfy
+  , oneOf
+  , noneOf
+  , anyChar
+  , char
+  , string
+  , newline
+  , space
+  , spaces
+  , letter
+  , digit
+  , hexDigit
+  , alphaNum
+  )
+where
+import qualified Text.Parsec as P
+import Text.Parsec (Stream(..), ParsecT)
+import Text.Parsec.Pos as P
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
+import Data.String (IsString(..))
+import qualified Data.List.NonEmpty as NonEmpty
+
+-- | A list of inputs labeled with source positions.  It is assumed
+-- that the 'Text's have @\n@ line endings.
+newtype Sources = Sources { unSources :: [(SourcePos, Text)] }
+  deriving (Show, Semigroup, Monoid)
+
+instance Monad m => Stream Sources m Char where
+  uncons (Sources []) = return Nothing
+  uncons (Sources ((pos,t):rest)) =
+    case T.uncons t of
+      Nothing -> uncons (Sources rest)
+      Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest))
+
+instance IsString Sources where
+  fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))]
+
+class ToSources a where
+  toSources :: a -> Sources
+
+instance ToSources Text where
+  toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)]
+
+instance ToSources [(FilePath, Text)] where
+  toSources = Sources
+            . map (\(fp,t) ->
+                    (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n'))
+
+instance ToSources Sources where
+  toSources = id
+
+sourcesToText :: Sources -> Text
+sourcesToText (Sources xs) = mconcat $ map snd xs
+
+addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
+addToSources pos t = do
+  curpos <- P.getPosition
+  Sources xs <- P.getInput
+  let xs' = case xs of
+               [] -> []
+               ((_,t'):rest) -> (curpos,t'):rest
+  P.setInput $ Sources ((pos, T.filter (/='\r') t):xs')
+
+ensureFinalNewlines :: Int -- ^ number of trailing newlines
+                    -> Sources
+                    -> Sources
+ensureFinalNewlines n (Sources xs) =
+  case NonEmpty.nonEmpty xs of
+    Nothing -> Sources [(initialPos "", T.replicate n "\n")]
+    Just lst ->
+      case NonEmpty.last lst of
+        (spos, t) ->
+          case T.length (T.takeWhileEnd (=='\n') t) of
+            len | len >= n -> Sources xs
+                | otherwise -> Sources (NonEmpty.init lst ++
+                                        [(spos,
+                                          t <> T.replicate (n - len) "\n")])
+
+class UpdateSourcePos s c where
+  updateSourcePos :: SourcePos -> c -> s -> SourcePos
+
+instance UpdateSourcePos Text Char where
+   updateSourcePos pos c _ = updatePosChar pos c
+
+instance UpdateSourcePos Sources Char where
+   updateSourcePos pos c sources =
+     case sources of
+       Sources [] -> updatePosChar pos c
+       Sources ((_,t):(pos',_):_)
+         | T.null t  -> pos'
+       Sources _ ->
+           case c of
+             '\n' -> incSourceLine (setSourceColumn pos 1) 1
+             '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4))
+             _    -> incSourceColumn pos 1
+
+-- | Get name of first source in 'Sources'.
+initialSourceName :: Sources -> FilePath
+initialSourceName (Sources []) = ""
+initialSourceName (Sources ((pos,_):_)) = sourceName pos
+
+-- | Add some text to the beginning of the input sources.
+-- This simplifies code that expands macros.
+addToInput :: Monad m => Text -> ParsecT Sources u m ()
+addToInput t = do
+  Sources xs <- P.getInput
+  case xs of
+    [] -> P.setInput $ Sources [(initialPos "",t)]
+    (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest)
+
+-- We need to redefine the parsers in Text.Parsec.Char so that they
+-- update source positions properly from the Sources stream.
+
+satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => (Char -> Bool) -> ParsecT s u m Char
+satisfy f = P.tokenPrim show updateSourcePos matcher
+ where
+  matcher c = if f c then Just c else Nothing
+
+oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => [Char] -> ParsecT s u m Char
+oneOf cs = satisfy (`elem` cs)
+
+noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => [Char] -> ParsecT s u m Char
+noneOf cs = satisfy (`notElem` cs)
+
+anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+        => ParsecT s u m Char
+anyChar = satisfy (const True)
+
+char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+     => Char -> ParsecT s u m Char
+char c = satisfy (== c)
+
+string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => [Char] -> ParsecT s u m [Char]
+string = mapM char
+
+newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+        => ParsecT s u m Char
+newline = satisfy (== '\n')
+
+space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => ParsecT s u m Char
+space = satisfy isSpace
+
+spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => ParsecT s u m ()
+spaces = P.skipMany space P.<?> "white space"
+
+letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => ParsecT s u m Char
+letter = satisfy isLetter
+
+alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => ParsecT s u m Char
+alphaNum = satisfy isAlphaNum
+
+digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => ParsecT s u m Char
+digit = satisfy isDigit
+
+hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => ParsecT s u m Char
+hexDigit = satisfy isHexDigit
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 6e38da21a..f055ab197 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -374,8 +374,8 @@ tests = [ testGroup "inline code"
         , testGroup "lhs"
           [ test (purely $ readMarkdown def{ readerExtensions = enableExtension
                        Ext_literate_haskell pandocExtensions })
-              "inverse bird tracks and html" $
-              "> a\n\n< b\n\n<div>\n"
+              "inverse bird tracks and html"
+              $ ("> a\n\n< b\n\n<div>\n" :: Text)
               =?> codeBlockWith ("",["haskell","literate"],[]) "a"
                   <>
                   codeBlockWith ("",["haskell"],[]) "b"
-- 
cgit v1.2.3