aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/App.hs8
-rw-r--r--src/Text/Pandoc/App/Opt.hs2
-rw-r--r--src/Text/Pandoc/Citeproc/BibTeX.hs14
-rw-r--r--src/Text/Pandoc/Error.hs39
-rw-r--r--src/Text/Pandoc/Logging.hs8
-rw-r--r--src/Text/Pandoc/Parsing.hs474
-rw-r--r--src/Text/Pandoc/Readers.hs97
-rw-r--r--src/Text/Pandoc/Readers/BibTeX.hs13
-rw-r--r--src/Text/Pandoc/Readers/CSV.hs14
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs70
-rw-r--r--src/Text/Pandoc/Readers/Creole.hs11
-rw-r--r--src/Text/Pandoc/Readers/CslJson.hs9
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs11
-rw-r--r--src/Text/Pandoc/Readers/DokuWiki.hs17
-rw-r--r--src/Text/Pandoc/Readers/FB2.hs9
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs33
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs12
-rw-r--r--src/Text/Pandoc/Readers/Ipynb.hs8
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs11
-rw-r--r--src/Text/Pandoc/Readers/Jira.hs16
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs23
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Citation.hs2
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs12
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs15
-rw-r--r--src/Text/Pandoc/Readers/Man.hs29
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs49
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs13
-rw-r--r--src/Text/Pandoc/Readers/Metadata.hs26
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs17
-rw-r--r--src/Text/Pandoc/Readers/Native.hs12
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs13
-rw-r--r--src/Text/Pandoc/Readers/Org.hs11
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs3
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs2
-rw-r--r--src/Text/Pandoc/Readers/RST.hs76
-rw-r--r--src/Text/Pandoc/Readers/Roff.hs34
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs12
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs172
-rw-r--r--src/Text/Pandoc/Readers/TikiWiki.hs12
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs12
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs19
-rw-r--r--src/Text/Pandoc/Shared.hs1
-rw-r--r--src/Text/Pandoc/Sources.hs195
43 files changed, 1021 insertions, 615 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 96e4b5f47..98b072ffb 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -160,9 +160,11 @@ convertWithOpts opts = do
else optTabStop opts)
- let readSources :: [FilePath] -> PandocIO Text
- readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
- mapM readSource srcs
+ let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
+ readSources srcs =
+ mapM (\fp -> do
+ t <- readSource fp
+ return (if fp == "-" then "" else fp, convertTabs t)) srcs
outputSettings <- optToOutputSettings opts
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index c72f63464..d54d932b7 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -687,7 +687,7 @@ yamlToMeta (Mapping _ _ m) =
where
pMetaString = pure . MetaString <$> P.manyChar P.anyChar
runEverything p =
- runPure (P.readWithM p (def :: P.ParserState) "")
+ runPure (P.readWithM p (def :: P.ParserState) ("" :: Text))
>>= fmap (Meta . flip P.runF def)
yamlToMeta _ = return mempty
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 510e56f9c..f6833000c 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -59,10 +59,11 @@ data Variant = Bibtex | Biblatex
deriving (Show, Eq, Ord)
-- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
-readBibtexString :: Variant -- ^ bibtex or biblatex
+readBibtexString :: ToSources a
+ => Variant -- ^ bibtex or biblatex
-> Locale -- ^ Locale
-> (Text -> Bool) -- ^ Filter on citation ids
- -> Text -- ^ bibtex/biblatex text
+ -> a -- ^ bibtex/biblatex text
-> Either ParseError [Reference Inlines]
readBibtexString variant locale idpred contents = do
case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
@@ -70,7 +71,7 @@ readBibtexString variant locale idpred contents = do
filter (\item -> idpred (identifier item) &&
entryType item /= "xdata"))
(fromMaybe defaultLang $ localeLanguage locale, Map.empty)
- "" contents of
+ "" (toSources contents) of
Left err -> Left err
Right xs -> return xs
@@ -339,7 +340,7 @@ defaultLang = Lang "en" Nothing (Just "US") [] [] []
-- a map of bibtex "string" macros
type StringMap = Map.Map Text Text
-type BibParser = Parser Text (Lang, StringMap)
+type BibParser = Parser Sources (Lang, StringMap)
data Item = Item{ identifier :: Text
, sourcePos :: SourcePos
@@ -804,7 +805,7 @@ bibEntries = do
(bibComment <|> bibPreamble <|> bibString))
bibSkip :: BibParser ()
-bibSkip = () <$ take1WhileP (/='@')
+bibSkip = skipMany1 (satisfy (/='@'))
bibComment :: BibParser ()
bibComment = do
@@ -829,6 +830,9 @@ bibString = do
updateState (\(l,m) -> (l, Map.insert k v m))
return ()
+take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text
+take1WhileP f = T.pack <$> many1 (satisfy f)
+
inBraces :: BibParser Text
inBraces = do
char '{'
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 8102f04cc..81eb41f85 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -23,26 +23,27 @@ import Control.Exception (Exception, displayException)
import Data.Typeable (Typeable)
import Data.Word (Word8)
import Data.Text (Text)
+import Data.List (sortOn)
import qualified Data.Text as T
+import Data.Ord (Down(..))
import GHC.Generics (Generic)
import Network.HTTP.Client (HttpException)
import System.Exit (ExitCode (..), exitWith)
import System.IO (stderr)
import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (Sources(..))
import Text.Printf (printf)
import Text.Parsec.Error
import Text.Parsec.Pos hiding (Line)
import Text.Pandoc.Shared (tshow)
import Citeproc (CiteprocError, prettyCiteprocError)
-type Input = Text
-
data PandocError = PandocIOError Text IOError
| PandocHttpError Text HttpException
| PandocShouldNeverHappenError Text
| PandocSomeError Text
| PandocParseError Text
- | PandocParsecError Input ParseError
+ | PandocParsecError Sources ParseError
| PandocMakePDFError Text
| PandocOptionError Text
| PandocSyntaxMapError Text
@@ -81,22 +82,28 @@ renderError e =
"Please report this to pandoc's developers: " <> s
PandocSomeError s -> s
PandocParseError s -> s
- PandocParsecError input err' ->
+ PandocParsecError (Sources inputs) err' ->
let errPos = errorPos err'
errLine = sourceLine errPos
errColumn = sourceColumn errPos
- ls = T.lines input <> [""]
- errorInFile = if length ls > errLine - 1
- then T.concat ["\n", ls !! (errLine - 1)
- ,"\n", T.replicate (errColumn - 1) " "
- ,"^"]
- else ""
- in "\nError at " <> tshow err' <>
- -- if error comes from a chunk or included file,
- -- then we won't get the right text this way:
- if sourceName errPos == "source"
- then errorInFile
- else ""
+ errFile = sourceName errPos
+ errorInFile =
+ case sortOn (Down . sourceLine . fst)
+ [ (pos,t)
+ | (pos,t) <- inputs
+ , sourceName pos == errFile
+ , sourceLine pos <= errLine
+ ] of
+ [] -> ""
+ ((pos,txt):_) ->
+ let ls = T.lines txt <> [""]
+ ln = errLine - sourceLine pos
+ in if length ls > ln - 1
+ then T.concat ["\n", ls !! (ln - 1)
+ ,"\n", T.replicate (errColumn - 1) " "
+ ,"^"]
+ else ""
+ in "\nError at " <> tshow err' <> errorInFile
PandocMakePDFError s -> s
PandocOptionError s -> s
PandocSyntaxMapError s -> s
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index efd2188f1..8c7292b69 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -241,9 +241,11 @@ instance ToJSON LogMessage where
showPos :: SourcePos -> Text
showPos pos = Text.pack $ sn ++ "line " ++
show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
- where sn = if sourceName pos == "source" || sourceName pos == ""
- then ""
- else sourceName pos ++ " "
+ where
+ sn' = sourceName pos
+ sn = if sn' == "source" || sn' == "" || sn' == "-"
+ then ""
+ else sn' ++ " "
encodeLogMessages :: [LogMessage] -> BL.ByteString
encodeLogMessages ms =
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 37ab0adaa..11c4c7a62 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -5,7 +5,6 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
-{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Parsing
@@ -19,8 +18,7 @@
A utility library with parsers used in pandoc readers.
-}
-module Text.Pandoc.Parsing ( take1WhileP,
- takeP,
+module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
countChar,
textStr,
anyLine,
@@ -134,22 +132,10 @@ module Text.Pandoc.Parsing ( take1WhileP,
getInput,
setInput,
unexpected,
- char,
- letter,
- digit,
- alphaNum,
skipMany,
skipMany1,
- spaces,
- space,
- anyChar,
- satisfy,
- newline,
- string,
count,
eof,
- noneOf,
- oneOf,
lookAhead,
notFollowedBy,
many,
@@ -174,6 +160,8 @@ module Text.Pandoc.Parsing ( take1WhileP,
SourcePos,
getPosition,
setPosition,
+ sourceName,
+ setSourceName,
sourceColumn,
sourceLine,
setSourceColumn,
@@ -189,16 +177,25 @@ module Text.Pandoc.Parsing ( take1WhileP,
where
import Control.Monad.Identity
+ ( guard,
+ join,
+ unless,
+ when,
+ void,
+ liftM2,
+ liftM,
+ Identity(..),
+ MonadPlus(mzero) )
import Control.Monad.Reader
+ ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
isPunctuation, isSpace, ord, toLower, toUpper)
-import Data.Default
+import Data.Default ( Default(..) )
import Data.Functor (($>))
import Data.List (intercalate, transpose)
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import qualified Data.Set as Set
-import Data.String
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -207,22 +204,108 @@ import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
import Text.Pandoc.Definition
+ ( Target,
+ nullMeta,
+ nullAttr,
+ Meta,
+ ColWidth(ColWidthDefault, ColWidth),
+ TableFoot(TableFoot),
+ TableBody(TableBody),
+ Attr,
+ TableHead(TableHead),
+ Row(..),
+ Alignment(..),
+ Inline(Str),
+ ListNumberDelim(..),
+ ListAttributes,
+ ListNumberStyle(..) )
import Text.Pandoc.Logging
+ ( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) )
import Text.Pandoc.Options
+ ( extensionEnabled,
+ Extension(Ext_old_dashes, Ext_tex_math_dollars,
+ Ext_tex_math_single_backslash, Ext_tex_math_double_backslash,
+ Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart),
+ ReaderOptions(readerTabStop, readerColumns, readerExtensions) )
import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Shared
+ ( uniqueIdent,
+ tshow,
+ mapLeft,
+ compactify,
+ trim,
+ trimr,
+ splitTextByIndices,
+ safeRead,
+ trimMath,
+ schemes,
+ escapeURI )
+import Text.Pandoc.Sources
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Pandoc.XML (fromEntities)
-import Text.Parsec hiding (token)
-import Text.Parsec.Pos (initialPos, newPos, updatePosString)
-
-import Control.Monad.Except
+import Text.Parsec
+ ( between,
+ setSourceName,
+ Parsec,
+ Column,
+ Line,
+ incSourceLine,
+ incSourceColumn,
+ setSourceLine,
+ setSourceColumn,
+ sourceLine,
+ sourceColumn,
+ sourceName,
+ setSourceName,
+ setPosition,
+ getPosition,
+ updateState,
+ setState,
+ getState,
+ optionMaybe,
+ optional,
+ option,
+ endBy1,
+ endBy,
+ sepEndBy1,
+ sepEndBy,
+ sepBy1,
+ sepBy,
+ try,
+ choice,
+ (<?>),
+ (<|>),
+ manyTill,
+ many1,
+ many,
+ notFollowedBy,
+ lookAhead,
+ eof,
+ count,
+ skipMany1,
+ skipMany,
+ unexpected,
+ setInput,
+ getInput,
+ anyToken,
+ tokenPrim,
+ parse,
+ runParserT,
+ runParser,
+ ParseError,
+ ParsecT,
+ SourcePos,
+ Stream(..) )
+import Text.Parsec.Pos (initialPos, newPos)
+import Control.Monad.Except ( MonadError(throwError) )
import Text.Pandoc.Error
+ ( PandocError(PandocParseError, PandocParsecError) )
type Parser t s = Parsec t s
type ParserT = ParsecT
+
-- | Reader monad wrapping the parser state. This is used to possibly delay
-- evaluation until all relevant information has been parsed and made available
-- in the parser state.
@@ -251,70 +334,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where
mappend = (<>)
-- | Like @count@, but packs its result
-countChar :: (Stream s m Char, Monad m)
+countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m)
=> Int
-> ParsecT s st m Char
-> ParsecT s st m Text
countChar n = fmap T.pack . count n
-- | Like @string@, but uses @Text@.
-textStr :: Stream s m Char => Text -> ParsecT s u m Text
+textStr :: (Stream s m Char, UpdateSourcePos s Char)
+ => Text -> ParsecT s u m Text
textStr t = string (T.unpack t) $> t
--- | Parse characters while a predicate is true.
-take1WhileP :: Monad m
- => (Char -> Bool)
- -> ParserT Text st m Text
-take1WhileP f = do
- -- needed to persuade parsec that this won't match an empty string:
- c <- satisfy f
- inp <- getInput
- pos <- getPosition
- let (t, rest) = T.span f inp
- setInput rest
- setPosition $
- if f '\t' || f '\n'
- then updatePosString pos $ T.unpack t
- else incSourceColumn pos (T.length t)
- return $ T.singleton c <> t
-
--- Parse n characters of input (or the rest of the input if
--- there aren't n characters).
-takeP :: Monad m => Int -> ParserT Text st m Text
-takeP n = do
- guard (n > 0)
- -- faster than 'count n anyChar'
- inp <- getInput
- pos <- getPosition
- let (xs, rest) = T.splitAt n inp
- -- needed to persuade parsec that this won't match an empty string:
- anyChar
- setInput rest
- setPosition $ updatePosString pos $ T.unpack xs
- return xs
-
--- | Parse any line of text
-anyLine :: Monad m => ParserT Text st m Text
+
+-- | Parse any line of text, returning the contents without the
+-- final newline.
+anyLine :: Monad m => ParserT Sources st m Text
anyLine = do
-- This is much faster than:
-- manyTill anyChar newline
inp <- getInput
- pos <- getPosition
- case T.break (=='\n') inp of
- (this, T.uncons -> Just ('\n', rest)) -> do
- -- needed to persuade parsec that this won't match an empty string:
- anyChar
- setInput rest
- setPosition $ incSourceLine (setSourceColumn pos 1) 1
- return this
- _ -> mzero
+ case inp of
+ Sources [] -> mzero
+ Sources ((fp,t):inps) ->
+ -- we assume that lines don't span different input files
+ case T.break (=='\n') t of
+ (this, rest)
+ | T.null rest
+ , not (null inps) ->
+ -- line may span different input files, so do it
+ -- character by character
+ T.pack <$> manyTill anyChar newline
+ | otherwise -> do -- either end of inputs or newline in rest
+ setInput $ Sources ((fp, rest):inps)
+ char '\n' -- needed so parsec knows we won't match empty string
+ -- and so source pos is updated
+ return this
-- | Parse any line, include the final newline in the output
-anyLineNewline :: Monad m => ParserT Text st m Text
+anyLineNewline :: Monad m => ParserT Sources st m Text
anyLineNewline = (<> "\n") <$> anyLine
-- | Parse indent by specified number of spaces (or equiv. tabs)
-indentWith :: Stream s m Char
+indentWith :: (Stream s m Char, UpdateSourcePos s Char)
=> HasReaderOptions st
=> Int -> ParserT s st m Text
indentWith num = do
@@ -399,11 +460,13 @@ notFollowedBy' p = try $ join $ do a <- try p
return (return ())
-- (This version due to Andrew Pimlott on the Haskell mailing list.)
-oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
+oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char)
+ => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
-- TODO: This should be re-implemented in a Text-aware way
-oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
+oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char)
+ => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
oneOfStrings'' _ [] = Prelude.fail "no strings"
oneOfStrings'' matches strs = try $ do
c <- anyChar
@@ -418,14 +481,16 @@ oneOfStrings'' matches strs = try $ do
-- | Parses one of a list of strings. If the list contains
-- two strings one of which is a prefix of the other, the longer
-- string will be matched if possible.
-oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char)
+ => [Text] -> ParserT s st m Text
oneOfStrings = oneOfStrings' (==)
-- | Parses one of a list of strings (tried in order), case insensitive.
-- TODO: This will not be accurate with general Unicode (neither
-- Text.toLower nor Text.toCaseFold can be implemented with a map)
-oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char)
+ => [Text] -> ParserT s st m Text
oneOfStringsCI = oneOfStrings' ciMatch
where ciMatch x y = toLower' x == toLower' y
-- this optimizes toLower by checking common ASCII case
@@ -436,11 +501,13 @@ oneOfStringsCI = oneOfStrings' ciMatch
| otherwise = toLower c
-- | Parses a space or tab.
-spaceChar :: Stream s m Char => ParserT s st m Char
+spaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+ => ParserT s st m Char
spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
-- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Stream s m Char => ParserT s st m Char
+nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+ => ParserT s st m Char
nonspaceChar = satisfy (not . isSpaceChar)
isSpaceChar :: Char -> Bool
@@ -451,21 +518,24 @@ isSpaceChar '\r' = True
isSpaceChar _ = False
-- | Skips zero or more spaces or tabs.
-skipSpaces :: Stream s m Char => ParserT s st m ()
+skipSpaces :: (Stream s m Char, UpdateSourcePos s Char)
+ => ParserT s st m ()
skipSpaces = skipMany spaceChar
-- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Stream s m Char => ParserT s st m Char
+blankline :: (Stream s m Char, UpdateSourcePos s Char)
+ => ParserT s st m Char
blankline = try $ skipSpaces >> newline
-- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Stream s m Char => ParserT s st m Text
+blanklines :: (Stream s m Char, UpdateSourcePos s Char)
+ => ParserT s st m Text
blanklines = T.pack <$> many1 blankline
-- | Gobble n spaces; if tabs are encountered, expand them
-- and gobble some or all of their spaces, leaving the rest.
gobbleSpaces :: (HasReaderOptions st, Monad m)
- => Int -> ParserT Text st m ()
+ => Int -> ParserT Sources st m ()
gobbleSpaces 0 = return ()
gobbleSpaces n
| n < 0 = error "gobbleSpaces called with negative number"
@@ -473,18 +543,26 @@ gobbleSpaces n
char ' ' <|> eatOneSpaceOfTab
gobbleSpaces (n - 1)
-eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char
+eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char
eatOneSpaceOfTab = do
- char '\t'
+ lookAhead (char '\t')
+ pos <- getPosition
tabstop <- getOption readerTabStop
+ -- replace the tab on the input stream with spaces
+ let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop)
inp <- getInput
- setInput $ T.replicate (tabstop - 1) " " <> inp
- return ' '
+ setInput $
+ case inp of
+ Sources [] -> error "eatOneSpaceOfTab - empty Sources list"
+ Sources ((fp,t):rest) ->
+ -- drop the tab and add spaces
+ Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest)
+ char ' '
-- | Gobble up to n spaces; if tabs are encountered, expand them
-- and gobble some or all of their spaces, leaving the rest.
gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
- => Int -> ParserT Text st m Int
+ => Int -> ParserT Sources st m Int
gobbleAtMostSpaces 0 = return 0
gobbleAtMostSpaces n
| n < 0 = error "gobbleAtMostSpaces called with negative number"
@@ -493,7 +571,8 @@ gobbleAtMostSpaces n
(+ 1) <$> gobbleAtMostSpaces (n - 1)
-- | Parses material enclosed between start and end parsers.
-enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser
+enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char)
+ => ParserT s st m t -- ^ start parser
-> ParserT s st m end -- ^ end parser
-> ParserT s st m a -- ^ content parser (to be used repeatedly)
-> ParserT s st m [a]
@@ -501,39 +580,41 @@ enclosed start end parser = try $
start >> notFollowedBy space >> many1Till parser end
-- | Parse string, case insensitive.
-stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text
+stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char)
+ => Text -> ParserT s st m Text
stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
-stringAnyCase' :: Stream s m Char => String -> ParserT s st m String
+stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char)
+ => String -> ParserT s st m String
stringAnyCase' [] = string ""
stringAnyCase' (x:xs) = do
firstChar <- char (toUpper x) <|> char (toLower x)
rest <- stringAnyCase' xs
return (firstChar:rest)
+-- TODO rewrite by just adding to Sources stream?
-- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: (Stream s m Char, IsString s)
- => ParserT s st m r
+parseFromString :: Monad m
+ => ParserT Sources st m r
-> Text
- -> ParserT s st m r
+ -> ParserT Sources st m r
parseFromString parser str = do
oldPos <- getPosition
- setPosition $ initialPos " chunk"
+ setPosition $ initialPos "chunk"
oldInput <- getInput
- setInput $ fromString $ T.unpack str
+ setInput $ toSources str
result <- parser
spaces
- eof
setInput oldInput
setPosition oldPos
return result
-- | Like 'parseFromString' but specialized for 'ParserState'.
-- This resets 'stateLastStrPos', which is almost always what we want.
-parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u)
- => ParserT s u m a
+parseFromString' :: (Monad m, HasLastStrPosition u)
+ => ParserT Sources u m a
-> Text
- -> ParserT s u m a
+ -> ParserT Sources u m a
parseFromString' parser str = do
oldLastStrPos <- getLastStrPos <$> getState
updateState $ setLastStrPos Nothing
@@ -542,7 +623,7 @@ parseFromString' parser str = do
return res
-- | Parse raw line block up to and including blank lines.
-lineClump :: Monad m => ParserT Text st m Text
+lineClump :: Monad m => ParserT Sources st m Text
lineClump = blanklines
<|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
@@ -551,7 +632,7 @@ lineClump = blanklines
-- pairs of open and close, which must be different. For example,
-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
-- and return "hello (there)".
-charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
+charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char
-> ParserT s st m Text
charsInBalanced open close parser = try $ do
char open
@@ -570,7 +651,7 @@ charsInBalanced open close parser = try $ do
-- Auxiliary functions for romanNumeral:
-- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true
+romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool -- ^ Uppercase if true
-> ParserT s st m Int
romanNumeral upperCase = do
let rchar uc = char $ if upperCase then uc else toLower uc
@@ -606,7 +687,7 @@ romanNumeral upperCase = do
-- | Parses an email address; returns original and corresponding
-- escaped mailto: URI.
-emailAddress :: Stream s m Char => ParserT s st m (Text, Text)
+emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
in (full, escapeURI $ "mailto:" <> full)
@@ -630,11 +711,11 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
isEmailPunct c = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
-uriScheme :: Stream s m Char => ParserT s st m Text
+uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text
uriScheme = oneOfStringsCI (Set.toList schemes)
-- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Stream s m Char => ParserT s st m (Text, Text)
+uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
uri = try $ do
scheme <- uriScheme
char ':'
@@ -677,7 +758,7 @@ uri = try $ do
uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
return (T.pack $ [l] ++ chunk ++ [r])
-mathInlineWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
+mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
mathInlineWith op cl = try $ do
textStr op
when (op == "$") $ notFollowedBy space
@@ -698,10 +779,10 @@ mathInlineWith op cl = try $ do
notFollowedBy digit -- to prevent capture of $5
return $ trimMath $ T.concat words'
where
- inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text
+ inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text
inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
- inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String
+ inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String
inBalancedBraces' 0 "" = do
c <- anyChar
if c == '{'
@@ -718,13 +799,13 @@ mathInlineWith op cl = try $ do
'{' -> inBalancedBraces' (numOpen + 1) (c:xs)
_ -> inBalancedBraces' numOpen (c:xs)
-mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
+mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
mathDisplayWith op cl = try $ fmap T.pack $ do
textStr op
many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
(try $ textStr cl)
-mathDisplay :: (HasReaderOptions st, Stream s m Char)
+mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Text
mathDisplay =
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
@@ -733,7 +814,7 @@ mathDisplay =
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathDisplayWith "\\\\[" "\\\\]")
-mathInline :: (HasReaderOptions st , Stream s m Char)
+mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Text
mathInline =
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
@@ -746,7 +827,7 @@ mathInline =
-- displacement (the difference between the source column at the end
-- and the source column at the beginning). Vertical displacement
-- (source row) is ignored.
-withHorizDisplacement :: Stream s m Char
+withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m a -- ^ Parser to apply
-> ParserT s st m (a, Int) -- ^ (result, displacement)
withHorizDisplacement parser = do
@@ -758,30 +839,37 @@ withHorizDisplacement parser = do
-- | Applies a parser and returns the raw string that was parsed,
-- along with the value produced by the parser.
withRaw :: Monad m
- => ParsecT Text st m a
- -> ParsecT Text st m (a, Text)
+ => ParsecT Sources st m a
+ -> ParsecT Sources st m (a, Text)
withRaw parser = do
- pos1 <- getPosition
- inp <- getInput
+ inps1 <- getInput
result <- parser
- pos2 <- getPosition
- let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
- let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
- let inplines = take ((l2 - l1) + 1) $ T.lines inp
- let raw = case inplines of
- [] -> ""
- [l] -> T.take (c2 - c1) l
- ls -> T.unlines (init ls) <> T.take (c2 - 1) (last ls)
- return (result, raw)
+ inps2 <- getInput
+ -- 'raw' is the difference between inps1 and inps2
+ return (result, sourcesDifference inps1 inps2)
+
+sourcesDifference :: Sources -> Sources -> Text
+sourcesDifference (Sources is1) (Sources is2) = go is1 is2
+ where
+ go inps1 inps2 =
+ case (inps1, inps2) of
+ ([], _) -> mempty
+ (_, []) -> mconcat $ map snd inps1
+ ((p1,t1):rest1, (p2, t2):rest2)
+ | p1 == p2
+ , t1 == t2 -> go rest1 rest2
+ | p1 == p2
+ , t1 /= t2 -> fromMaybe mempty $ T.stripSuffix t2 t1
+ | otherwise -> t1 <> go rest1 inps2
-- | Parses backslash, then applies character parser.
-escaped :: Stream s m Char
+escaped :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Char -- ^ Parser for character to escape
-> ParserT s st m Char
escaped parser = try $ char '\\' >> parser
-- | Parse character entity.
-characterReference :: Stream s m Char => ParserT s st m Char
+characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
characterReference = try $ do
char '&'
ent <- many1Till nonspaceChar (char ';')
@@ -794,19 +882,19 @@ characterReference = try $ do
_ -> Prelude.fail "entity not found"
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
upperRoman = do
num <- romanNumeral True
return (UpperRoman, num)
-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
lowerRoman = do
num <- romanNumeral False
return (LowerRoman, num)
-- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
decimal = do
num <- many1 digit
return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
@@ -815,7 +903,7 @@ decimal = do
-- returns (DefaultStyle, [next example number]). The next
-- example number is incremented in parser state, and the label
-- (if present) is added to the label table.
-exampleNum :: Stream s m Char
+exampleNum :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s ParserState m (ListNumberStyle, Int)
exampleNum = do
char '@'
@@ -834,37 +922,37 @@ exampleNum = do
return (Example, num)
-- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
defaultNum = do
char '#'
return (DefaultStyle, 1)
-- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
lowerAlpha = do
ch <- satisfy isAsciiLower
return (LowerAlpha, ord ch - ord 'a' + 1)
-- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
upperAlpha = do
ch <- satisfy isAsciiUpper
return (UpperAlpha, ord ch - ord 'A' + 1)
-- | Parses a roman numeral i or I
-romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
(char 'I' >> return (UpperRoman, 1))
-- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
+anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes
anyOrderedListMarker = choice
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
numParser <- [decimal, exampleNum, defaultNum, romanOne,
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Stream s m Char
+inPeriod :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inPeriod num = try $ do
@@ -876,7 +964,7 @@ inPeriod num = try $ do
return (start, style, delim)
-- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Stream s m Char
+inOneParen :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inOneParen num = try $ do
@@ -885,7 +973,7 @@ inOneParen num = try $ do
return (start, style, OneParen)
-- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Stream s m Char
+inTwoParens :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inTwoParens num = try $ do
@@ -896,7 +984,7 @@ inTwoParens num = try $ do
-- | Parses an ordered list marker with a given style and delimiter,
-- returns number.
-orderedListMarker :: Stream s m Char
+orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char)
=> ListNumberStyle
-> ListNumberDelim
-> ParserT s ParserState m Int
@@ -919,10 +1007,10 @@ orderedListMarker style delim = do
return start
-- | Parses a character reference and returns a Str element.
-charRef :: Stream s m Char => ParserT s st m Inline
+charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline
charRef = Str . T.singleton <$> characterReference
-lineBlockLine :: Monad m => ParserT Text st m Text
+lineBlockLine :: Monad m => ParserT Sources st m Text
lineBlockLine = try $ do
char '|'
char ' '
@@ -932,11 +1020,11 @@ lineBlockLine = try $ do
continuations <- many (try $ char ' ' >> anyLine)
return $ white <> T.unwords (line : continuations)
-blankLineBlockLine :: Stream s m Char => ParserT s st m Char
+blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
blankLineBlockLine = try (char '|' >> blankline)
-- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Monad m => ParserT Text st m [Text]
+lineBlockLines :: Monad m => ParserT Sources st m [Text]
lineBlockLines = try $ do
lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
skipMany blankline
@@ -944,7 +1032,8 @@ lineBlockLines = try $ do
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'.
-tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith :: (Stream s m Char, UpdateSourcePos s Char,
+ HasReaderOptions st, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ([Int] -> ParserT s st m (mf [Blocks]))
-> ParserT s st m sep
@@ -964,7 +1053,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do
type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
-tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
+ HasReaderOptions st, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ([Int] -> ParserT s st m (mf [Blocks]))
-> ParserT s st m sep
@@ -1013,20 +1103,19 @@ widthsFromIndices numColumns' indices =
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
- Monad mf, IsString s)
- => ParserT s st m (mf Blocks) -- ^ Block list parser
+gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf)
+ => ParserT Sources st m (mf Blocks) -- ^ Block list parser
-> Bool -- ^ Headerless table
- -> ParserT s st m (mf Blocks)
+ -> ParserT Sources st m (mf Blocks)
gridTableWith blocks headless =
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
-gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
- Monad mf, IsString s)
- => ParserT s st m (mf Blocks) -- ^ Block list parser
+gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st,
+ Monad mf)
+ => ParserT Sources st m (mf Blocks) -- ^ Block list parser
-> Bool -- ^ Headerless table
- -> ParserT s st m (TableComponents mf)
+ -> ParserT Sources st m (TableComponents mf)
gridTableWith' blocks headless =
tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
@@ -1035,7 +1124,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitTextByIndices (init indices) $ trimr line
-gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
+gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
gridPart ch = do
leftColon <- option False (True <$ char ':')
dashes <- many1 (char ch)
@@ -1050,7 +1139,7 @@ gridPart ch = do
(False, False) -> AlignDefault
return ((lengthDashes, lengthDashes + 1), alignment)
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
+gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: Text -> Text
@@ -1059,14 +1148,14 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
go c = T.any (== c) " \t"
-- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
+gridTableSep :: Monad m => Char -> ParserT Sources st m Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
-gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
+gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st)
=> Bool -- ^ Headerless table
- -> ParserT s st m (mf Blocks)
- -> ParserT s st m (mf [Blocks], [Alignment], [Int])
+ -> ParserT Sources st m (mf Blocks)
+ -> ParserT Sources st m (mf [Blocks], [Alignment], [Int])
gridTableHeader True _ = do
optional blanklines
dashes <- gridDashedLines '-'
@@ -1089,17 +1178,17 @@ gridTableHeader False blocks = try $ do
heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
return (heads, aligns, indices)
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text]
+gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
return (gridTableSplitLine indices $ T.pack line)
-- | Parse row of grid table.
-gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
- => ParserT s st m (mf Blocks)
+gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st)
+ => ParserT Sources st m (mf Blocks)
-> [Int]
- -> ParserT s st m (mf [Blocks])
+ -> ParserT Sources st m (mf [Blocks])
gridTableRow blocks indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
@@ -1120,34 +1209,38 @@ removeOneLeadingSpace xs =
Just (c, _) -> c == ' '
-- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s st m ()
+gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
gridTableFooter = optional blanklines
---
-- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: Monad m
- => ParserT Text st m a -- ^ parser
- -> st -- ^ initial state
- -> Text -- ^ input
+readWithM :: (Monad m, ToSources t)
+ => ParserT Sources st m a -- ^ parser
+ -> st -- ^ initial state
+ -> t -- ^ input
-> m (Either PandocError a)
readWithM parser state input =
- mapLeft (PandocParsecError input) <$> runParserT parser state "source" input
+ mapLeft (PandocParsecError sources)
+ <$> runParserT parser state (initialSourceName sources) sources
+ where
+ sources = toSources input
-- | Parse a string with a given parser and state
-readWith :: Parser Text st a
+readWith :: ToSources t
+ => Parser Sources st a
-> st
- -> Text
+ -> t
-> Either PandocError a
readWith p t inp = runIdentity $ readWithM p t inp
-- | Parse a string with @parser@ (for testing).
testStringWith :: Show a
- => ParserT Text ParserState Identity a
+ => ParserT Sources ParserState Identity a
-> Text
-> IO ()
testStringWith parser str = UTF8.putStrLn $ tshow $
- readWith parser defaultParserState str
+ readWith parser defaultParserState (toSources str)
-- | Parsing options.
data ParserState = ParserState
@@ -1394,19 +1487,23 @@ registerHeader (ident,classes,kvs) header' = do
updateState $ updateIdentifierList $ Set.insert ident
return (ident,classes,kvs)
-smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
+ HasQuoteContext st m,
+ Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
smartPunctuation inlineParser = do
guardEnabled Ext_smart
choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
-quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+quoted :: (HasLastStrPosition st, HasQuoteContext st m,
+ Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
-singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
+ Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
singleQuoted inlineParser = do
@@ -1416,7 +1513,8 @@ singleQuoted inlineParser = do
(withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
<|> pure "\8217"
-doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char)
+doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
+ Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
doubleQuoted inlineParser = do
@@ -1433,13 +1531,14 @@ failIfInQuoteContext context = do
context' <- getQuoteContext
when (context' == context) $ Prelude.fail "already inside quotes"
-charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char
+charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char
charOrRef cs =
oneOf cs <|> try (do c <- characterReference
guard (c `elem` cs)
return c)
-singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
+ Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
@@ -1449,7 +1548,7 @@ singleQuoteStart = do
charOrRef "'\8216\145"
void $ lookAhead (satisfy (not . isSpaceChar))
-singleQuoteEnd :: Stream s m Char
+singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
singleQuoteEnd = try $ do
charOrRef "'\8217\146"
@@ -1457,7 +1556,7 @@ singleQuoteEnd = try $ do
doubleQuoteStart :: (HasLastStrPosition st,
HasQuoteContext st m,
- Stream s m Char)
+ Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
@@ -1465,21 +1564,21 @@ doubleQuoteStart = do
try $ do charOrRef "\"\8220\147"
void $ lookAhead (satisfy (not . isSpaceChar))
-doubleQuoteEnd :: Stream s m Char
+doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
doubleQuoteEnd = void (charOrRef "\"\8221\148")
-apostrophe :: Stream s m Char => ParserT s st m Inlines
+apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
-doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines
+doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
doubleCloseQuote = B.str "\8221" <$ char '"'
-ellipses :: Stream s m Char
+ellipses :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
ellipses = try (string "..." >> return (B.str "\8230"))
-dash :: (HasReaderOptions st, Stream s m Char)
+dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
dash = try $ do
oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
@@ -1506,7 +1605,7 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
-citeKey :: (Stream s m Char, HasLastStrPosition st)
+citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
=> ParserT s st m (Bool, Text)
citeKey = try $ do
guard =<< notAfterString
@@ -1575,10 +1674,11 @@ insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
insertIncludedFile blocks totoks dirs f =
runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
+-- TODO: replace this with something using addToSources.
-- | Parse content of include file as future blocks. Circular includes result in
-- an @PandocParseError@.
insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
- => ParserT Text st m (Future st Blocks)
+ => ParserT Sources st m (Future st Blocks)
-> [FilePath] -> FilePath
- -> ParserT Text st m (Future st Blocks)
-insertIncludedFileF p = insertIncludedFile' p id
+ -> ParserT Sources st m (Future st Blocks)
+insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)])
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 7ae9db34f..5106f8058 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE MonoLocalBinds #-}
+{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
@@ -72,6 +73,7 @@ import Text.Pandoc.Error
import Text.Pandoc.Extensions
import Text.Pandoc.Options
import Text.Pandoc.Readers.CommonMark
+import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.Creole
import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.Docx
@@ -84,7 +86,6 @@ import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.JATS (readJATS)
import Text.Pandoc.Readers.Jira (readJira)
import Text.Pandoc.Readers.LaTeX
-import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.MediaWiki
import Text.Pandoc.Readers.Muse
import Text.Pandoc.Readers.Native
@@ -102,50 +103,52 @@ import Text.Pandoc.Readers.CSV
import Text.Pandoc.Readers.CslJson
import Text.Pandoc.Readers.BibTeX
import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
+data Reader m = TextReader (forall a . ToSources a =>
+ ReaderOptions -> a -> m Pandoc)
| ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
-- | Association list of formats and readers.
readers :: PandocMonad m => [(Text, Reader m)]
-readers = [ ("native" , TextReader readNative)
- ,("json" , TextReader readJSON)
- ,("markdown" , TextReader readMarkdown)
- ,("markdown_strict" , TextReader readMarkdown)
- ,("markdown_phpextra" , TextReader readMarkdown)
- ,("markdown_github" , TextReader readMarkdown)
- ,("markdown_mmd", TextReader readMarkdown)
- ,("commonmark" , TextReader readCommonMark)
- ,("commonmark_x" , TextReader readCommonMark)
- ,("creole" , TextReader readCreole)
- ,("dokuwiki" , TextReader readDokuWiki)
- ,("gfm" , TextReader readCommonMark)
- ,("rst" , TextReader readRST)
- ,("mediawiki" , TextReader readMediaWiki)
- ,("vimwiki" , TextReader readVimwiki)
- ,("docbook" , TextReader readDocBook)
- ,("opml" , TextReader readOPML)
- ,("org" , TextReader readOrg)
- ,("textile" , TextReader readTextile) -- TODO : textile+lhs
- ,("html" , TextReader readHtml)
- ,("jats" , TextReader readJATS)
- ,("jira" , TextReader readJira)
- ,("latex" , TextReader readLaTeX)
- ,("haddock" , TextReader readHaddock)
- ,("twiki" , TextReader readTWiki)
- ,("tikiwiki" , TextReader readTikiWiki)
- ,("docx" , ByteStringReader readDocx)
- ,("odt" , ByteStringReader readOdt)
- ,("t2t" , TextReader readTxt2Tags)
- ,("epub" , ByteStringReader readEPUB)
- ,("muse" , TextReader readMuse)
- ,("man" , TextReader readMan)
- ,("fb2" , TextReader readFB2)
- ,("ipynb" , TextReader readIpynb)
- ,("csv" , TextReader readCSV)
- ,("csljson" , TextReader readCslJson)
- ,("bibtex" , TextReader readBibTeX)
- ,("biblatex" , TextReader readBibLaTeX)
+readers = [("native" , TextReader readNative)
+ ,("json" , TextReader readJSON)
+ ,("markdown" , TextReader readMarkdown)
+ ,("markdown_strict" , TextReader readMarkdown)
+ ,("markdown_phpextra" , TextReader readMarkdown)
+ ,("markdown_github" , TextReader readMarkdown)
+ ,("markdown_mmd", TextReader readMarkdown)
+ ,("commonmark" , TextReader readCommonMark)
+ ,("commonmark_x" , TextReader readCommonMark)
+ ,("creole" , TextReader readCreole)
+ ,("dokuwiki" , TextReader readDokuWiki)
+ ,("gfm" , TextReader readCommonMark)
+ ,("rst" , TextReader readRST)
+ ,("mediawiki" , TextReader readMediaWiki)
+ ,("vimwiki" , TextReader readVimwiki)
+ ,("docbook" , TextReader readDocBook)
+ ,("opml" , TextReader readOPML)
+ ,("org" , TextReader readOrg)
+ ,("textile" , TextReader readTextile) -- TODO : textile+lhs
+ ,("html" , TextReader readHtml)
+ ,("jats" , TextReader readJATS)
+ ,("jira" , TextReader readJira)
+ ,("latex" , TextReader readLaTeX)
+ ,("haddock" , TextReader readHaddock)
+ ,("twiki" , TextReader readTWiki)
+ ,("tikiwiki" , TextReader readTikiWiki)
+ ,("docx" , ByteStringReader readDocx)
+ ,("odt" , ByteStringReader readOdt)
+ ,("t2t" , TextReader readTxt2Tags)
+ ,("epub" , ByteStringReader readEPUB)
+ ,("muse" , TextReader readMuse)
+ ,("man" , TextReader readMan)
+ ,("fb2" , TextReader readFB2)
+ ,("ipynb" , TextReader readIpynb)
+ ,("csv" , TextReader readCSV)
+ ,("csljson" , TextReader readCslJson)
+ ,("bibtex" , TextReader readBibTeX)
+ ,("biblatex" , TextReader readBibLaTeX)
]
-- | Retrieve reader, extensions based on formatSpec (format+extensions).
@@ -173,9 +176,13 @@ getReader s =
return (r, exts)
-- | Read pandoc document from JSON format.
-readJSON :: PandocMonad m
- => ReaderOptions -> Text -> m Pandoc
-readJSON _ t =
- case eitherDecode' . BL.fromStrict . UTF8.fromText $ t of
+readJSON :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
+readJSON _ s =
+ case eitherDecode' . BL.fromStrict . UTF8.fromText
+ . sourcesToText . toSources $ s of
Right doc -> return doc
- Left e -> throwError $ PandocParseError ("JSON parse error: " <> T.pack e)
+ Left e -> throwError $ PandocParseError ("JSON parse error: "
+ <> T.pack e)
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index b82a81350..318afda85 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -23,30 +23,33 @@ where
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, cite, str)
-import Data.Text (Text)
import Citeproc (Lang(..), parseLang)
import Citeproc.Locale (getLocale)
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Class (PandocMonad, lookupEnv)
import Text.Pandoc.Citeproc.BibTeX as BibTeX
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Text.Pandoc.Sources (ToSources(..))
import Control.Monad.Except (throwError)
-- | Read BibTeX from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibTeX :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
readBibTeX = readBibTeX' BibTeX.Bibtex
-- | Read BibLaTeX from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibLaTeX :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
readBibLaTeX = readBibTeX' BibTeX.Biblatex
-readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
+readBibTeX' :: (PandocMonad m, ToSources a)
+ => Variant -> ReaderOptions -> a -> m Pandoc
readBibTeX' variant _opts t = do
mblangEnv <- lookupEnv "LANG"
let defaultLang = Lang "en" Nothing (Just "US") [] [] []
@@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do
Left _ -> throwError $ PandocCiteprocError e
Right l -> return l
case BibTeX.readBibtexString variant locale (const True) t of
- Left e -> throwError $ PandocParsecError t e
+ Left e -> throwError $ PandocParsecError (toSources t) e
Right refs -> return $ setMeta "references"
(map referenceToMetaValue refs)
. setMeta "nocite"
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 2958d6180..eca8f9425 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -13,23 +13,23 @@
Conversion from CSV to a 'Pandoc' table.
-}
module Text.Pandoc.Readers.CSV ( readCSV ) where
-import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.Pandoc.Options (ReaderOptions)
import Control.Monad.Except (throwError)
-readCSV :: PandocMonad m
+readCSV :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ Text to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
-readCSV _opts s =
- case parseCSV defaultCSVOptions (crFilter s) of
+readCSV _opts s = do
+ let txt = sourcesToText $ toSources s
+ case parseCSV defaultCSVOptions txt of
Right (r:rs) -> return $ B.doc $ B.table capt
(zip aligns widths)
(TableHead nullAttr hdrs)
@@ -45,4 +45,4 @@ readCSV _opts s =
aligns = replicate numcols AlignDefault
widths = replicate numcols ColWidthDefault
Right [] -> return $ B.doc mempty
- Left e -> throwError $ PandocParsecError s e
+ Left e -> throwError $ PandocParsecError (toSources [("",txt)]) e
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 244f77940..b099a9b50 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
import Control.Monad.Except
import Data.Functor.Identity (runIdentity)
import Data.Typeable
-import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine,
- runF, defaultParserState, take1WhileP, option)
+import Text.Pandoc.Parsing (runParserT, getPosition,
+ runF, defaultParserState, option, many1, anyChar,
+ Sources(..), ToSources(..), ParserT, Future,
+ sourceName)
import qualified Data.Text as T
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCommonMark :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
readCommonMark opts s
- | isEnabled Ext_yaml_metadata_block opts
- , "---" `T.isPrefixOf` s = do
- let metaValueParser = do
- inp <- option "" $ take1WhileP (const True)
- case runIdentity
- (commonmarkWith (specFor opts) "metadata value" inp) of
- Left _ -> mzero
- Right (Cm bls :: Cm () Blocks)
- -> return $ return $ B.toMetaValue bls
- res <- runParserT (do meta <- yamlMetaBlock metaValueParser
- pos <- getPosition
- return (meta, pos))
- defaultParserState "YAML metadata" s
- case res of
- Left _ -> readCommonMarkBody opts s
- Right (meta, pos) -> do
- let dropLines 0 = id
- dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n')
- let metaLines = sourceLine pos - 1
- let body = T.replicate metaLines "\n" <> dropLines metaLines s
- Pandoc _ bs <- readCommonMarkBody opts body
- return $ Pandoc (runF meta defaultParserState) bs
- | otherwise = readCommonMarkBody opts s
+ | isEnabled Ext_yaml_metadata_block opts = do
+ let sources = toSources s
+ let toks = concatMap sourceToToks (unSources sources)
+ res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
+ pos <- getPosition
+ return (meta, pos))
+ defaultParserState "YAML metadata" (toSources s)
+ case res of
+ Left _ -> readCommonMarkBody opts sources toks
+ Right (meta, pos) -> do
+ -- strip off metadata section and parse body
+ let body = dropWhile (\t -> tokPos t < pos) toks
+ Pandoc _ bs <- readCommonMarkBody opts sources body
+ return $ Pandoc (runF meta defaultParserState) bs
+ | otherwise = do
+ let sources = toSources s
+ let toks = concatMap sourceToToks (unSources sources)
+ readCommonMarkBody opts sources toks
-readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMarkBody opts s
+sourceToToks :: (SourcePos, Text) -> [Tok]
+sourceToToks (pos, s) = tokenize (sourceName pos) s
+
+metaValueParser :: Monad m
+ => ReaderOptions -> ParserT Sources st m (Future st MetaValue)
+metaValueParser opts = do
+ inp <- option "" $ T.pack <$> many1 anyChar
+ let toks = concatMap sourceToToks (unSources (toSources inp))
+ case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+ Left _ -> mzero
+ Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
+readCommonMarkBody opts s toks
| isEnabled Ext_sourcepos opts =
- case runIdentity (commonmarkWith (specFor opts) "" s) of
+ case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
| otherwise =
- case runIdentity (commonmarkWith (specFor opts) "" s) of
+ case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 2658dfea2..ad848ada7 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed)
-import Text.Pandoc.Shared (crFilter)
-
-- | Read creole from an input string and return a Pandoc document.
-readCreole :: PandocMonad m
+readCreole :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readCreole opts s = do
- res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
+ let sources = ensureFinalNewlines 2 (toSources s)
+ res <- readWithM parseCreole def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
-type CRLParser = ParserT Text ParserState
+type CRLParser = ParserT Sources ParserState
--
-- Utility functions
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 30bb19483..a0af5c325 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -24,21 +24,22 @@ import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, cite, str)
import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
import Control.Monad.Except (throwError)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-- | Read CSL JSON from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCslJson _opts t =
- case cslJsonToReferences (UTF8.fromText t) of
+readCslJson :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
+readCslJson _opts x =
+ case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
Left e -> throwError $ PandocParseError $ T.pack e
Right refs -> return $ setMeta "references"
(map referenceToMetaValue refs)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ac3caa2c0..3db459cfd 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -30,7 +30,8 @@ import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
import Text.Pandoc.Options
import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.TeXMath (readMathML, writeTeX)
import Text.Pandoc.XML.Light
@@ -539,11 +540,15 @@ instance Default DBState where
, dbContent = [] }
-readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readDocBook _ inp = do
+ let sources = toSources inp
tree <- either (throwError . PandocXMLError "") return $
parseXMLContents
- (TL.fromStrict . handleInstructions $ crFilter inp)
+ (TL.fromStrict . handleInstructions . sourcesToText $ sources)
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index dedc1f03f..db98ac8de 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -29,26 +29,27 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
+import Text.Pandoc.Shared (trim, stringify, tshow)
-- | Read DokuWiki from an input string and return a Pandoc document.
-readDokuWiki :: PandocMonad m
+readDokuWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readDokuWiki opts s = do
- let input = crFilter s
- res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+ let sources = toSources s
+ res <- runParserT parseDokuWiki def {stateOptions = opts }
+ (initialSourceName sources) sources
case res of
- Left e -> throwError $ PandocParsecError input e
+ Left e -> throwError $ PandocParsecError sources e
Right d -> return d
-type DWParser = ParserT Text ParserState
+type DWParser = ParserT Sources ParserState
-- * Utility functions
-- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
eol = void newline <|> eof
nested :: PandocMonad m => DWParser m a -> DWParser m a
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index 66e390bd7..84e5278db 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.XML.Light
import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
type FB2 m = StateT FB2State m
@@ -63,9 +63,12 @@ instance HasMeta FB2State where
setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
-readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readFB2 :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readFB2 _ inp =
- case parseXMLElement $ TL.fromStrict $ crFilter inp of
+ case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
Left msg -> throwError $ PandocXMLError "" msg
Right el -> do
(bs, st) <- runStateT (parseRootElement el) def
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c3e68afd8..f5c8a2277 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -62,21 +62,21 @@ import Text.Pandoc.Options (
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Shared (
- addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces,
+ addMetaField, blocksToInlines', escapeURI, extractSpaces,
htmlSpanLikeElements, renderTags', safeRead, tshow)
import Text.Pandoc.Walk
import Text.Parsec.Error
import Text.TeXMath (readMathML, writeTeX)
-- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
+readHtml :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assumes @'\n'@ line endings)
+ -> a -- ^ Input to parse
-> m Pandoc
readHtml opts inp = do
let tags = stripPrefixes $ canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True }
- (crFilter inp)
+ (sourcesToText $ toSources inp)
parseDoc = do
blocks <- fixPlains False . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
@@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
pTagText :: PandocMonad m => TagParser m Inlines
pTagText = try $ do
+ pos <- getPosition
(TagText str) <- pSatisfy isTagText
st <- getState
qu <- ask
parsed <- lift $ lift $
- flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+ flip runReaderT qu $ runParserT (many pTagContents) st "text"
+ (Sources [(pos, str)])
case parsed of
Left _ -> throwError $ PandocParseError $
"Could not parse `" <> str <> "'"
Right result -> return $ mconcat result
-type InlinesParser m = HTMLParser m Text
+type InlinesParser m = HTMLParser m Sources
pTagContents :: PandocMonad m => InlinesParser m Inlines
pTagContents =
@@ -970,13 +972,14 @@ isCommentTag = tagComment (const True)
-- | Matches a stretch of HTML in balanced tags.
htmlInBalanced :: Monad m
=> (Tag Text -> Bool)
- -> ParserT Text st m Text
+ -> ParserT Sources st m Text
htmlInBalanced f = try $ do
lookAhead (char '<')
- inp <- getInput
- let ts = canonicalizeTags $
- parseTagsOptions parseOptions{ optTagWarning = True,
- optTagPosition = True } inp
+ sources <- getInput
+ let ts = canonicalizeTags
+ $ parseTagsOptions parseOptions{ optTagWarning = True,
+ optTagPosition = True }
+ $ sourcesToText sources
case ts of
(TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
guard $ f t
@@ -1018,15 +1021,17 @@ hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
htmlTag :: (HasReaderOptions st, Monad m)
=> (Tag Text -> Bool)
- -> ParserT Text st m (Tag Text, Text)
+ -> ParserT Sources st m (Tag Text, Text)
htmlTag f = try $ do
lookAhead (char '<')
startpos <- getPosition
- inp <- getInput
+ sources <- getInput
+ let inp = sourcesToText sources
let ts = canonicalizeTags $ parseTagsOptions
parseOptions{ optTagWarning = False
, optTagPosition = True }
- (inp <> " ") -- add space to ensure that
+ (inp <> " ")
+ -- add space to ensure that
-- we get a TagPosition after the tag
(next, ln, col) <- case ts of
(TagPosition{} : next : TagPosition ln col : _)
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 48454e353..35eaac0a9 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -19,7 +19,7 @@ import Control.Monad.Except (throwError)
import Data.List (intersperse)
import Data.List.NonEmpty (nonEmpty)
import Data.Maybe (fromMaybe)
-import Data.Text (Text, unpack)
+import Data.Text (unpack)
import qualified Data.Text as T
import Documentation.Haddock.Parser
import Documentation.Haddock.Types as H
@@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.Shared (splitTextBy, trim)
-- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: PandocMonad m
+readHaddock :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
+readHaddock opts s = case readHaddockEither opts
+ (unpack . sourcesToText . toSources $ s) of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index 70296bb6b..cd1093109 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -39,10 +39,12 @@ import Data.Aeson as Aeson
import Control.Monad.Except (throwError)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readIpynb opts t = do
- let src = BL.fromStrict (TE.encodeUtf8 t)
+readIpynb :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
+readIpynb opts x = do
+ let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
case eitherDecode src of
Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
Left _ ->
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c068f3774..9cdbf1611 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
import Text.TeXMath (readMathML, writeTeX)
import Text.Pandoc.XML.Light
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
type JATS m = StateT JATSState m
@@ -52,10 +53,14 @@ instance Default JATSState where
, jatsContent = [] }
-readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readJATS :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readJATS _ inp = do
+ let sources = toSources inp
tree <- either (throwError . PandocXMLError "") return $
- parseXMLContents (TL.fromStrict $ crFilter inp)
+ parseXMLContents (TL.fromStrict . sourcesToText $ sources)
(bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 89aecbf56..a3b415f09 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
import Text.Pandoc.Error (PandocError (PandocParseError))
import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Shared (stringify)
-
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import qualified Text.Jira.Markup as Jira
-- | Read Jira wiki markup.
-readJira :: PandocMonad m
+readJira :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
-readJira _opts s = case parse s of
- Right d -> return $ jiraToPandoc d
- Left e -> throwError . PandocParseError $
- "Jira parse error" `append` pack (show e)
+readJira _opts inp = do
+ let sources = toSources inp
+ case parse (sourcesToText sources) of
+ Right d -> return $ jiraToPandoc d
+ Left e -> throwError . PandocParseError $
+ "Jira parse error" `append` pack (show e)
jiraToPandoc :: Jira.Doc -> Pandoc
jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9ad168293..f90d562ae 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty)
-- import Debug.Trace (traceShowId)
-- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: PandocMonad m
+readLaTeX :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assumes @'\n'@ line endings)
+ -> a -- ^ Input to parse
-> m Pandoc
readLaTeX opts ltx = do
+ let sources = toSources ltx
parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
- (tokenize "source" (crFilter ltx))
+ (tokenizeSources sources)
case parsed of
Right result -> return result
- Left e -> throwError $ PandocParsecError ltx e
+ Left e -> throwError $ PandocParsecError sources e
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
@@ -132,11 +133,11 @@ resolveRefs _ x = x
rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => ParserT Text s m Text
+ => ParserT Sources s m Text
rawLaTeXBlock = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" inp
+ let toks = tokenizeSources inp
snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
<|> rawLaTeXParser toks True
(do choice (map controlSeq
@@ -163,11 +164,11 @@ beginOrEndCommand = try $ do
(txt <> untokenize rawargs)
rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => ParserT Text s m Text
+ => ParserT Sources s m Text
rawLaTeXInline = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" inp
+ let toks = tokenizeSources inp
raw <- snd <$>
( rawLaTeXParser toks True
(mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
@@ -178,11 +179,11 @@ rawLaTeXInline = do
finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
return $ raw <> T.pack finalbraces
-inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
inlineCommand = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" inp
+ let toks = tokenizeSources inp
fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
inlines
@@ -641,7 +642,7 @@ opt = do
parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
case parsed of
Right result -> return result
- Left e -> throwError $ PandocParsecError (untokenize toks) e
+ Left e -> throwError $ PandocParsecError (toSources toks) e
-- block elements:
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
index 655823dab..af97125c6 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Citation.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do
runParserT (mconcat <$> many inline) st "bracketed option" toks
case parsed of
Right result -> return result
- Left e -> throwError $ PandocParsecError (untokenize toks) e
+ Left e -> throwError $ PandocParsecError (toSources toks) e
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index db58b333d..35ce3509d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, rawLaTeXParser
, applyMacros
, tokenize
+ , tokenizeSources
, untokenize
, untoken
, totoks
@@ -248,7 +249,7 @@ withVerbatimMode parser = do
rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> [Tok] -> Bool -> LP m a -> LP m a
- -> ParserT Text s m (a, Text)
+ -> ParserT Sources s m (a, Text)
rawLaTeXParser toks retokenize parser valParser = do
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate }
@@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do
Left _ -> mzero
Right ((val, raw), st) -> do
updateState (updateMacros (sMacros st <>))
- _ <- takeP (T.length (untokenize toks'))
+ void $ count (T.length (untokenize toks')) anyChar
let result = untokenize raw
-- ensure we end with space if input did, see #4442
let result' =
@@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do
return (val, result')
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => Text -> ParserT Text s m Text
+ => Text -> ParserT Sources s m Text
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
do let retokenize = untokenize <$> many (satisfyTok (const True))
pstate <- getState
@@ -301,6 +302,11 @@ QuickCheck property:
> let t = T.pack s in untokenize (tokenize "random" t) == t
-}
+tokenizeSources :: Sources -> [Tok]
+tokenizeSources = concatMap tokenizeSource . unSources
+ where
+ tokenizeSource (pos, t) = totoks pos t
+
tokenize :: SourceName -> Text -> [Tok]
tokenize sourcename = totoks (initialPos sourcename)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index f8c214318..c20b72bc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleInstances #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Types
Copyright : Copyright (C) 2017-2021 John MacFarlane
@@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
)
where
import Data.Text (Text)
-import Text.Parsec.Pos (SourcePos)
+import Text.Parsec.Pos (SourcePos, sourceName)
+import Text.Pandoc.Sources
+import Data.List (groupBy)
data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
Esc1 | Esc2 | Arg Int
@@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
data Tok = Tok SourcePos TokType Text
deriving (Eq, Ord, Show)
+instance ToSources [Tok] where
+ toSources = Sources
+ . map (\ts -> case ts of
+ Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
+ _ -> error "toSources [Tok] encountered empty group")
+ . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
+
+tokToText :: Tok -> Text
+tokToText (Tok _ _ t) = t
+
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
deriving (Eq, Ord, Show)
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 21b8feaab..1141af66f 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
import Control.Monad.Trans (lift)
import Control.Monad.Except (throwError)
import Data.Maybe (catMaybes, isJust)
-import Data.List (intersperse, intercalate)
+import Data.List (intersperse)
import qualified Data.Text as T
import Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
@@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Walk (query)
-import Text.Pandoc.Shared (crFilter, mapLeft)
+import Text.Pandoc.Shared (mapLeft)
import Text.Pandoc.Readers.Roff -- TODO explicit imports
-import Text.Parsec hiding (tokenPrim)
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosString)
import qualified Data.Foldable as Foldable
@@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
-- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
-readMan opts txt = do
- tokenz <- lexRoff (initialPos "input") (crFilter txt)
+readMan :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
+readMan opts s = do
+ let Sources inps = toSources s
+ tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
let state = def {readerOptions = opts} :: ManState
+ let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
+ fixError e = e
eitherdoc <- readWithMTokens parseMan state
(Foldable.toList . unRoffTokens $ tokenz)
- either throwError return eitherdoc
+ either (throwError . fixError) return eitherdoc
+
readWithMTokens :: PandocMonad m
=> ParserT [RoffToken] ManState m a -- ^ parser
@@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
-> [RoffToken] -- ^ input
-> m (Either PandocError a)
readWithMTokens parser state input =
- let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
+ let leftF = PandocParsecError mempty
in mapLeft leftF `liftM` runParserT parser state "source" input
+
parseMan :: PandocMonad m => ManParser m Pandoc
parseMan = do
bs <- many parseBlock <* eof
@@ -89,7 +96,7 @@ parseBlock = choice [ parseList
parseTable :: PandocMonad m => ManParser m Blocks
parseTable = do
- modifyState $ \st -> st { tableCellsPlain = True }
+ updateState $ \st -> st { tableCellsPlain = True }
let isTbl Tbl{} = True
isTbl _ = False
Tbl _opts rows pos <- msatisfy isTbl
@@ -135,7 +142,7 @@ parseTable = do
case res' of
Left _ -> Prelude.fail "Could not parse table cell"
Right x -> do
- modifyState $ \s -> s{ tableCellsPlain = False }
+ updateState $ \s -> s{ tableCellsPlain = False }
return x
Right x -> return x
@@ -222,7 +229,7 @@ parseTitle = do
setMeta "section" (linePartsToInlines y)
[x] -> setMeta "title" (linePartsToInlines x)
[] -> id
- modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
+ updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
return mempty
linePartsToInlines :: [LinePart] -> Inlines
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index ba8ed147e..69dd51bc4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
+-- import Debug.Trace (traceShowId)
-type MarkdownParser m = ParserT Text ParserState m
+type MarkdownParser m = ParserT Sources ParserState m
type F = Future ParserState
-- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: PandocMonad m
+readMarkdown :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a -- ^ Input
-> m Pandoc
readMarkdown opts s = do
parsed <- readWithM parseMarkdown def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ (ensureFinalNewlines 3 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
@@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do
meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
setPosition oldPos
return $ runF meta defaultParserState
- parsed <- readWithM parser def{ stateOptions = opts } ""
+ parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
case parsed of
Right result -> return result
Left e -> throwError e
@@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do
refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
setPosition oldPos
return $ runF refs defaultParserState
- parsed <- readWithM parser def{ stateOptions = opts } ""
+ parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
case parsed of
Right result -> return result
Left e -> throwError e
@@ -146,14 +147,14 @@ inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
-spnl :: PandocMonad m => ParserT Text st m ()
+spnl :: PandocMonad m => ParserT Sources st m ()
spnl = try $ do
skipSpaces
optional newline
skipSpaces
notFollowedBy (char '\n')
-spnl' :: PandocMonad m => ParserT Text st m Text
+spnl' :: PandocMonad m => ParserT Sources st m Text
spnl' = try $ do
xs <- many spaceChar
ys <- option "" $ try $ (:) <$> newline
@@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _)
-- hrule block
--
-hrule :: PandocMonad m => ParserT Text st m (F Blocks)
+hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline
blockDelimiter :: PandocMonad m
=> (Char -> Bool)
-> Maybe Int
- -> ParserT Text ParserState m Int
+ -> ParserT Sources ParserState m Int
blockDelimiter f len = try $ do
skipNonindentSpaces
c <- lookAhead (satisfy f)
@@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ T.intercalate "\n" lns'
-birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
+birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
@@ -1025,7 +1026,7 @@ para = try $ do
option (B.plain <$> result)
$ try $ do
newline
- (blanklines >> return mempty)
+ (mempty <$ blanklines)
<|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
@@ -1170,7 +1171,7 @@ lineBlock = do
-- and the length including trailing space.
dashedLine :: PandocMonad m
=> Char
- -> ParserT Text st m (Int, Int)
+ -> ParserT Sources st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
@@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m
-> MarkdownParser m [Text]
rawTableLine indices = do
notFollowedBy' (blanklines' <|> tableFooter)
- line <- take1WhileP (/='\n') <* newline
+ line <- anyLine
return $ map trim $ tail $
splitTextByIndices (init indices) line
@@ -1390,7 +1391,7 @@ pipeTableCell =
return $ B.plain <$> result)
<|> return mempty
-pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
@@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do
(Just _,Just _) -> AlignCenter, len)
-- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT Text st m ()
+scanForPipe :: PandocMonad m => ParserT Sources st m ()
scanForPipe = do
- inp <- getInput
- case T.break (\c -> c == '\n' || c == '|') inp of
+ Sources inps <- getInput
+ let ln = case inps of
+ [] -> ""
+ ((_,t):(_,t'):_) | T.null t -> t'
+ ((_,t):_) -> t
+ case T.break (\c -> c == '\n' || c == '|') ln of
(_, T.uncons -> Just ('|', _)) -> return ()
_ -> mzero
@@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
-nonEndline :: PandocMonad m => ParserT Text st m Char
+nonEndline :: PandocMonad m => ParserT Sources st m Char
nonEndline = satisfy (/='\n')
str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
result <- mconcat <$> many1
- ( take1WhileP isAlphaNum
+ ( T.pack <$> (many1 alphaNum)
<|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
updateLastStrPos
(do guardEnabled Ext_smart
@@ -1962,7 +1967,7 @@ rawLaTeXInline' = do
s <- rawLaTeXInline
return $ return $ B.rawInline "tex" s -- "tex" because it might be context
-rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
+rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do
(try $ string "\\stop" >> textStr completion)
return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
-inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
+inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
inBrackets parser = do
char '['
contents <- manyChar parser
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 9f4d5e170..825e4a2eb 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -36,17 +36,18 @@ import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
trim, splitTextBy, tshow)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML (fromEntities)
-- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: PandocMonad m
- => ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+readMediaWiki :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
-> m Pandoc
readMediaWiki opts s = do
+ let sources = toSources s
parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
@@ -55,7 +56,7 @@ readMediaWiki opts s = do
, mwLogMessages = []
, mwInTT = False
}
- (crFilter s <> "\n")
+ sources
case parsed of
Right result -> return result
Left e -> throwError e
@@ -69,7 +70,7 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwInTT :: Bool
}
-type MWParser m = ParserT Text MWState m
+type MWParser m = ParserT Sources MWState m
instance HasReaderOptions MWState where
extractReaderOptions = mwOptions
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index cb141cba5..bbcfe62ea 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL
import qualified Text.Pandoc.UTF8 as UTF8
yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
- => ParserT Text st m (Future st MetaValue)
+ => ParserT Sources st m (Future st MetaValue)
-> BL.ByteString
- -> ParserT Text st m (Future st Meta)
+ -> ParserT Sources st m (Future st Meta)
yamlBsToMeta pMetaValue bstr = do
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing
-- Returns filtered list of references.
yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
- => ParserT Text st m (Future st MetaValue)
+ => ParserT Sources st m (Future st MetaValue)
-> (Text -> Bool) -- ^ Filter for id
-> BL.ByteString
- -> ParserT Text st m (Future st [MetaValue])
+ -> ParserT Sources st m (Future st [MetaValue])
yamlBsToRefs pMetaValue idpred bstr =
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
nodeToKey _ = Nothing
normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
- => ParserT Text st m (Future st MetaValue)
+ => ParserT Sources st m (Future st MetaValue)
-> Text
- -> ParserT Text st m (Future st MetaValue)
+ -> ParserT Sources st m (Future st MetaValue)
normalizeMetaValue pMetaValue x =
-- Note: a standard quoted or unquoted YAML value will
-- not end in a newline, but a "block" set off with
@@ -133,9 +133,9 @@ checkBoolean t
| otherwise = Nothing
yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
- => ParserT Text st m (Future st MetaValue)
+ => ParserT Sources st m (Future st MetaValue)
-> YAML.Node YE.Pos
- -> ParserT Text st m (Future st MetaValue)
+ -> ParserT Sources st m (Future st MetaValue)
yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
case x of
YAML.SStr t -> normalizeMetaValue pMetaValue t
@@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
yamlToMetaValue _ _ = return $ return $ MetaString ""
yamlMap :: (PandocMonad m, HasLastStrPosition st)
- => ParserT Text st m (Future st MetaValue)
+ => ParserT Sources st m (Future st MetaValue)
-> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
- -> ParserT Text st m (Future st (M.Map Text MetaValue))
+ -> ParserT Sources st m (Future st (M.Map Text MetaValue))
yamlMap pMetaValue o = do
kvs <- forM (M.toList o) $ \(key, v) -> do
k <- maybe (throwError $ PandocParseError
@@ -177,8 +177,8 @@ yamlMap pMetaValue o = do
-- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
- => ParserT Text st m (Future st MetaValue)
- -> ParserT Text st m (Future st Meta)
+ => ParserT Sources st m (Future st MetaValue)
+ -> ParserT Sources st m (Future st Meta)
yamlMetaBlock parser = try $ do
string "---"
blankline
@@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do
optional blanklines
yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-stopLine :: Monad m => ParserT Text st m ()
+stopLine :: Monad m => ParserT Sources st m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index 698bfd3d7..a0d4534f1 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing
-import Text.Pandoc.Shared (crFilter, trimr, tshow)
+import Text.Pandoc.Shared (trimr, tshow)
-- | Read Muse from an input string and return a Pandoc document.
-readMuse :: PandocMonad m
+readMuse :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readMuse opts s = do
- let input = crFilter s
- res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
+ let sources = toSources s
+ res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
+ (initialSourceName sources) sources
case res of
- Left e -> throwError $ PandocParsecError input e
+ Left e -> throwError $ PandocParsecError sources e
Right d -> return d
type F = Future MuseState
@@ -82,7 +83,7 @@ instance Default MuseEnv where
, museInPara = False
}
-type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
+type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
instance HasReaderOptions MuseState where
extractReaderOptions = museOptions
@@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
-- * Parsers
-- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
eol = void newline <|> eof
getIndent :: PandocMonad m
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 9c8bc0374..58f235e81 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
import Data.Text (Text)
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
@@ -32,14 +33,15 @@ import Text.Pandoc.Error
--
-- > Pandoc nullMeta [Plain [Str "hi"]]
--
-readNative :: PandocMonad m
+readNative :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readNative _ s =
- case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
- Right doc -> return doc
- Left _ -> throwError $ PandocParseError "couldn't read native"
+ let t = sourcesToText . toSources $ s
+ in case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
+ Right doc -> return doc
+ Left _ -> throwError $ PandocParseError "couldn't read native"
readBlocks :: Text -> Either PandocError [Block]
readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 5f2ddb876..668c9ca11 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -24,7 +24,8 @@ import Text.Pandoc.Options
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
-import Text.Pandoc.Shared (crFilter, blocksToInlines')
+import Text.Pandoc.Shared (blocksToInlines')
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.Pandoc.XML.Light
import Control.Monad.Except (throwError)
@@ -46,10 +47,14 @@ instance Default OPMLState where
, opmlOptions = def
}
-readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readOPML :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readOPML opts inp = do
- (bs, st') <- runStateT
- (case parseXMLContents (TL.fromStrict (crFilter inp)) of
+ let sources = toSources inp
+ (bs, st') <-
+ runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
Left msg -> throwError $ PandocXMLError "" msg
Right ns -> mapM parseBlock ns)
def{ opmlOptions = opts }
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index afeb27a87..8823befdd 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing (reportLogMessages)
-import Text.Pandoc.Shared (crFilter)
-
+import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
-import Data.Text (Text)
-
-- | Parse org-mode string and return a Pandoc document.
-readOrg :: PandocMonad m
+readOrg :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readOrg opts s = do
parsed <- flip runReaderT def $
readWithM parseOrg (optionsToParserState opts)
- (crFilter s <> "\n\n")
+ (ensureFinalNewlines 2 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 519a6ce04..054f2611a 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -29,6 +29,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
+import Text.Pandoc.Sources (ToSources(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Control.Monad (guard, mplus, mzero, unless, void, when)
@@ -802,7 +803,7 @@ inlineLaTeX = try $ do
parseAsInlineLaTeX :: PandocMonad m
=> Text -> TeXExport -> OrgParser m (Maybe Inlines)
parseAsInlineLaTeX cs = \case
- TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
+ TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
TeXIgnore -> return (Just mempty)
TeXVerbatim -> return (Just $ B.str cs)
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 6ed24a602..c7ea02815 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -114,7 +114,7 @@ import Control.Monad (guard)
import Control.Monad.Reader (ReaderT)
-- | The parser used to read org files.
-type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
--
-- Adaptions and specializations of parsing utilities
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index ac4c0b6cb..a3fcf028c 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -38,25 +38,24 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Printf (printf)
import Data.Time.Format
-- TODO:
-- [ ] .. parsed-literal
-- | Parse reStructuredText string and return Pandoc document.
-readRST :: PandocMonad m
+readRST :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ Text to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readRST opts s = do
parsed <- readWithM parseRST def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ (ensureFinalNewlines 2 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
-type RSTParser m = ParserT Text ParserState m
+type RSTParser m = ParserT Sources ParserState m
--
-- Constants and data structure definitions
@@ -151,11 +150,19 @@ parseRST = do
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- T.concat <$>
- manyTill (referenceKey <|> anchorDef <|>
- noteBlock <|> citationBlock <|>
- (snd <$> withRaw comment) <|>
- headerBlock <|> lineClump) eof
+ let chunk = referenceKey
+ <|> anchorDef
+ <|> noteBlock
+ <|> citationBlock
+ <|> (snd <$> withRaw comment)
+ <|> headerBlock
+ <|> lineClump
+ docMinusKeys <- Sources <$>
+ manyTill (do pos <- getPosition
+ t <- chunk
+ return (pos, t)) eof
+ -- UGLY: we collapse source position information.
+ -- TODO: fix the parser to use the F monad instead of two passes
setInput docMinusKeys
setPosition startPos
st' <- getState
@@ -348,7 +355,7 @@ singleHeader' = try $ do
-- hrule block
--
-hrule :: Monad m => ParserT Text st m Blocks
+hrule :: Monad m => ParserT Sources st m Blocks
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@@ -363,7 +370,7 @@ hrule = try $ do
-- read a line indented by a given string
indentedLine :: (HasReaderOptions st, Monad m)
- => Int -> ParserT Text st m Text
+ => Int -> ParserT Sources st m Text
indentedLine indents = try $ do
lookAhead spaceChar
gobbleAtMostSpaces indents
@@ -372,7 +379,7 @@ indentedLine indents = try $ do
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
indentedBlock :: (HasReaderOptions st, Monad m)
- => ParserT Text st m Text
+ => ParserT Sources st m Text
indentedBlock = try $ do
indents <- length <$> lookAhead (many1 spaceChar)
lns <- many1 $ try $ do b <- option "" blanklines
@@ -381,20 +388,20 @@ indentedBlock = try $ do
optional blanklines
return $ T.unlines lns
-quotedBlock :: Monad m => ParserT Text st m Text
+quotedBlock :: Monad m => ParserT Sources st m Text
quotedBlock = try $ do
quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
lns <- many1 $ lookAhead (char quote) >> anyLine
optional blanklines
return $ T.unlines lns
-codeBlockStart :: Monad m => ParserT Text st m Char
+codeBlockStart :: Monad m => ParserT Sources st m Char
codeBlockStart = string "::" >> blankline >> blankline
-codeBlock :: Monad m => ParserT Text ParserState m Blocks
+codeBlock :: Monad m => ParserT Sources ParserState m Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
-codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
+codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
codeBlockBody = do
lang <- stateRstHighlight <$> getState
try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
@@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
return $ B.codeBlockWith ("", ["haskell","literate"], [])
$ T.intercalate "\n" lns
-latexCodeBlock :: Monad m => ParserT Text st m [Text]
+latexCodeBlock :: Monad m => ParserT Sources st m [Text]
latexCodeBlock = try $ do
try (latexBlockLine "\\begin{code}")
many1Till anyLine (try $ latexBlockLine "\\end{code}")
where
latexBlockLine s = skipMany spaceChar >> string s >> blankline
-birdCodeBlock :: Monad m => ParserT Text st m [Text]
+birdCodeBlock :: Monad m => ParserT Sources st m [Text]
birdCodeBlock = filterSpace <$> many1 birdTrackLine
where filterSpace lns =
-- if (as is normal) there is always a space after >, drop it
@@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
then map (T.drop 1) lns
else lns
-birdTrackLine :: Monad m => ParserT Text st m Text
+birdTrackLine :: Monad m => ParserT Sources st m Text
birdTrackLine = char '>' >> anyLine
--
@@ -456,7 +463,6 @@ includeDirective top fields body = do
let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
oldPos <- getPosition
- oldInput <- getInput
containers <- stateContainers <$> getState
when (f `elem` containers) $
throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
@@ -494,15 +500,11 @@ includeDirective top fields body = do
Nothing -> case lookup "literal" fields of
Just _ -> return $ B.rawBlock "rst" contents'
Nothing -> do
- setPosition $ newPos (T.unpack f) 1 1
- setInput $ contents' <> "\n"
- bs <- optional blanklines >>
- (mconcat <$> many block)
- setInput oldInput
- setPosition oldPos
+ addToSources (initialPos (T.unpack f))
+ (contents' <> "\n")
updateState $ \s -> s{ stateContainers =
tail $ stateContainers s }
- return bs
+ return mempty
--
@@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
definitionList = B.definitionList <$> many1 definitionListItem
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT Text st m Int
+bulletListStart :: Monad m => ParserT Sources st m Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
@@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
-simpleReferenceName :: Monad m => ParserT Text st m Text
+simpleReferenceName :: Monad m => ParserT Sources st m Text
simpleReferenceName = do
x <- alphaNum
xs <- many $ alphaNum
@@ -1122,7 +1124,7 @@ referenceKey = do
-- return enough blanks to replace key
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-targetURI :: Monad m => ParserT Text st m Text
+targetURI :: Monad m => ParserT Sources st m Text
targetURI = do
skipSpaces
optional $ try $ newline >> notFollowedBy blankline
@@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m ()
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
- pos <- getPosition
- let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
+ -- we need to ensure that the keys are ordered by occurrence in
+ -- the document.
+ numKeys <- M.size . stateKeys <$> getState
+ let key = toKey $ "_" <> T.pack (show numKeys)
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
@@ -1250,13 +1254,13 @@ headerBlock = do
-- Grid tables TODO:
-- - column spans
-dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
-simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
@@ -1382,7 +1386,7 @@ hyphens = do
-- don't want to treat endline after hyphen or dash as a space
return $ B.str result
-escapedChar :: Monad m => ParserT Text st m Inlines
+escapedChar :: Monad m => ParserT Sources st m Inlines
escapedChar = do c <- escaped anyChar
return $ if c == ' ' || c == '\n' || c == '\r'
-- '\ ' is null in RST
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 509ce1377..47f16ef4b 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Shared (safeRead)
-import Text.Parsec hiding (tokenPrim)
import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
import qualified Data.Sequence as Seq
import qualified Data.Foldable as Foldable
@@ -122,16 +121,16 @@ instance Default RoffState where
, afterConditional = False
}
-type RoffLexer m = ParserT T.Text RoffState m
+type RoffLexer m = ParserT Sources RoffState m
--
-- Lexer: T.Text -> RoffToken
--
-eofline :: Stream s m Char => ParsecT s u m ()
+eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
-spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
spacetab = char ' ' <|> char '\t'
characterCodeMap :: M.Map T.Text Char
@@ -303,8 +302,7 @@ expandString = try $ do
char '*'
cs <- escapeArg <|> countChar 1 anyChar
s <- linePartsToText <$> resolveText cs pos
- getInput >>= setInput . (s <>)
- return ()
+ addToInput s
-- Parses: '..'
quoteArg :: PandocMonad m => RoffLexer m T.Text
@@ -316,7 +314,7 @@ escFont = do
font' <- if T.null font || font == "P"
then prevFont <$> getState
else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
- modifyState $ \st -> st{ prevFont = currentFont st
+ updateState $ \st -> st{ prevFont = currentFont st
, currentFont = font' }
return [Font font']
where
@@ -372,8 +370,8 @@ lexTable pos = do
spaces
opts <- try tableOptions <|> [] <$ optional (char ';')
case lookup "tab" opts of
- Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
- _ -> modifyState $ \st -> st{ tableTabChar = '\t' }
+ Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
+ _ -> updateState $ \st -> st{ tableTabChar = '\t' }
spaces
skipMany lexComment
spaces
@@ -489,18 +487,18 @@ lexConditional mname = do
ifPart <- do
optional $ try $ char '\\' >> newline
lexGroup
- <|> do modifyState $ \s -> s{ afterConditional = True }
+ <|> do updateState $ \s -> s{ afterConditional = True }
t <- manToken
- modifyState $ \s -> s{ afterConditional = False }
+ updateState $ \s -> s{ afterConditional = False }
return t
case mbtest of
Nothing -> do
- putState st -- reset state, so we don't record macros in skipped section
+ setState st -- reset state, so we don't record macros in skipped section
report $ SkippedContent (T.cons '.' mname) pos
return mempty
Just True -> return ifPart
Just False -> do
- putState st
+ setState st
return mempty
expression :: PandocMonad m => RoffLexer m (Maybe Bool)
@@ -515,7 +513,7 @@ expression = do
_ -> Nothing
where
returnValue v = do
- modifyState $ \st -> st{ lastExpression = v }
+ updateState $ \st -> st{ lastExpression = v }
return v
lexGroup :: PandocMonad m => RoffLexer m RoffTokens
@@ -536,7 +534,7 @@ lexIncludeFile args = do
result <- readFileFromDirs dirs $ T.unpack fp
case result of
Nothing -> report $ CouldNotLoadIncludeFile fp pos
- Just s -> getInput >>= setInput . (s <>)
+ Just s -> addToInput s
return mempty
[] -> return mempty
@@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
(x:ys) -> do
let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
let stringName = linePartsToText x
- modifyState $ \st ->
+ updateState $ \st ->
st{ customMacros = M.insert stringName ts (customMacros st) }
return mempty
lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
lexMacroDef args = do -- macro definition
- modifyState $ \st -> st{ roffMode = CopyMode }
+ updateState $ \st -> st{ roffMode = CopyMode }
(macroName, stopMacro) <-
case args of
(x : y : _) -> return (linePartsToText x, linePartsToText y)
@@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
_ <- lexArgs
return ()
ts <- mconcat <$> manyTill manToken stop
- modifyState $ \st ->
+ updateState $ \st ->
st{ customMacros = M.insert macroName ts (customMacros st)
, roffMode = NormalMode }
return mempty
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index c4d7bcc93..276d28aaa 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -28,22 +28,22 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, tshow)
+import Text.Pandoc.Shared (tshow)
import Text.Pandoc.XML (fromEntities)
-- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
+readTWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readTWiki opts s = do
- res <- readWithM parseTWiki def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ let sources = ensureFinalNewlines 2 (toSources s)
+ res <- readWithM parseTWiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
-type TWParser = ParserT Text ParserState
+type TWParser = ParserT Sources ParserState
--
-- utility functions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 8d7900de4..981878206 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -53,30 +53,34 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Text.Pandoc.Shared (trim, tshow)
-- | Parse a Textile text and return a Pandoc document.
-readTextile :: PandocMonad m
+readTextile :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readTextile opts s = do
- parsed <- readWithM parseTextile def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ let sources = ensureFinalNewlines 2 (toSources s)
+ parsed <- readWithM parseTextile def{ stateOptions = opts } sources
case parsed of
Right result -> return result
Left e -> throwError e
+type TextileParser = ParserT Sources ParserState
-- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
+parseTextile :: PandocMonad m => TextileParser m Pandoc
parseTextile = do
many blankline
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys/notes were...
- let firstPassParser = noteBlock <|> lineClump
- manyTill firstPassParser eof >>= setInput . T.concat
+ let firstPassParser = do
+ pos <- getPosition
+ t <- noteBlock <|> lineClump
+ return (pos, t)
+ manyTill firstPassParser eof >>= setInput . Sources
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
@@ -84,10 +88,10 @@ parseTextile = do
-- now parse it for real...
Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
-noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker :: PandocMonad m => TextileParser m Text
noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
-noteBlock :: PandocMonad m => ParserT Text ParserState m Text
+noteBlock :: PandocMonad m => TextileParser m Text
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
@@ -102,11 +106,11 @@ noteBlock = try $ do
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
+parseBlocks :: PandocMonad m => TextileParser m Blocks
parseBlocks = mconcat <$> manyTill block eof
-- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
+blockParsers :: PandocMonad m => [TextileParser m Blocks]
blockParsers = [ codeBlock
, header
, blockQuote
@@ -121,22 +125,22 @@ blockParsers = [ codeBlock
]
-- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT Text ParserState m Blocks
+block :: PandocMonad m => TextileParser m Blocks
block = do
res <- choice blockParsers <?> "block"
trace (T.take 60 $ tshow $ B.toList res)
return res
-commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+commentBlock :: PandocMonad m => TextileParser m Blocks
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
return mempty
-codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlock :: PandocMonad m => TextileParser m Blocks
codeBlock = codeBlockTextile <|> codeBlockHtml
-codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockTextile :: PandocMonad m => TextileParser m Blocks
codeBlockTextile = try $ do
string "bc." <|> string "pre."
extended <- option False (True <$ char '.')
@@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
trimTrailingNewlines = T.dropWhileEnd (=='\n')
-- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockHtml :: PandocMonad m => TextileParser m Blocks
codeBlockHtml = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
@@ -174,7 +178,7 @@ codeBlockHtml = try $ do
return $ B.codeBlockWith (ident,classes,kvs) result'''
-- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT Text ParserState m Blocks
+header :: PandocMonad m => TextileParser m Blocks
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
@@ -186,14 +190,14 @@ header = try $ do
return $ B.headerWith attr' level name
-- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
+blockQuote :: PandocMonad m => TextileParser m Blocks
blockQuote = try $ do
string "bq" >> attributes >> char '.' >> whitespace
B.blockQuote <$> para
-- Horizontal rule
-hrule :: PandocMonad m => ParserT Text st m Blocks
+hrule :: PandocMonad m => TextileParser m Blocks
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@@ -208,39 +212,39 @@ hrule = try $ do
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
-anyList :: PandocMonad m => ParserT Text ParserState m Blocks
+anyList :: PandocMonad m => TextileParser m Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return $ B.orderedList items
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
genericListItemAtDepth c depth = try $ do
count depth (char c) >> attributes >> whitespace
contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
return $ contents <> sublist
-- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
+definitionList :: PandocMonad m => TextileParser m Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
-- | List start character.
-listStart :: PandocMonad m => ParserT Text ParserState m ()
+listStart :: PandocMonad m => TextileParser m ()
listStart = genericListStart '*'
<|> () <$ genericListStart '#'
<|> () <$ definitionListStart
-genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
+genericListStart :: PandocMonad m => Char -> TextileParser m ()
genericListStart c = () <$ try (many1 (char c) >> whitespace)
-basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
+basicDLStart :: PandocMonad m => TextileParser m ()
basicDLStart = do
char '-'
whitespace
notFollowedBy newline
-definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
+definitionListStart :: PandocMonad m => TextileParser m Inlines
definitionListStart = try $ do
basicDLStart
trimInlines . mconcat <$>
@@ -281,15 +285,15 @@ definitionListStart = try $ do
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
-definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
definitionListItem = try $ do
term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
return (term, def')
- where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+ where inlineDef :: PandocMonad m => TextileParser m [Blocks]
inlineDef = liftM (\d -> [B.plain d])
$ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
- multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+ multilineDef :: PandocMonad m => TextileParser m [Blocks]
multilineDef = try $ do
optional whitespace >> newline
s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
@@ -300,7 +304,7 @@ definitionListItem = try $ do
-- raw content
-- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
rawHtmlBlock = try $ do
skipMany spaceChar
(_,b) <- htmlTag isBlockTag
@@ -308,14 +312,14 @@ rawHtmlBlock = try $ do
return $ B.rawBlock "html" b
-- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT Text ParserState m Blocks
+para :: PandocMonad m => TextileParser m Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
@@ -326,7 +330,7 @@ toAlignment '>' = AlignRight
toAlignment '=' = AlignCenter
toAlignment _ = AlignDefault
-cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
cellAttributes = try $ do
isHeader <- option False (True <$ char '_')
-- we just ignore colspan and rowspan markers:
@@ -339,7 +343,7 @@ cellAttributes = try $ do
return (isHeader, alignment)
-- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
tableCell = try $ do
char '|'
(isHeader, alignment) <- option (False, AlignDefault) cellAttributes
@@ -350,7 +354,7 @@ tableCell = try $ do
return ((isHeader, alignment), B.plain content)
-- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
tableRow = try $ do
-- skip optional row attributes
optional $ try $ do
@@ -360,7 +364,7 @@ tableRow = try $ do
many1 tableCell <* char '|' <* blankline
-- | A table with an optional header.
-table :: PandocMonad m => ParserT Text ParserState m Blocks
+table :: PandocMonad m => TextileParser m Blocks
table = try $ do
-- ignore table attributes
caption <- option mempty $ try $ do
@@ -388,7 +392,7 @@ table = try $ do
(TableFoot nullAttr [])
-- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
+ignorableRow :: PandocMonad m => TextileParser m ()
ignorableRow = try $ do
char '|'
oneOf ":^-~"
@@ -397,7 +401,7 @@ ignorableRow = try $ do
_ <- anyLine
return ()
-explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
explicitBlockStart name = try $ do
string (T.unpack name)
attributes
@@ -409,8 +413,8 @@ explicitBlockStart name = try $ do
-- However, they can be used to set HTML/CSS attributes when needed.
maybeExplicitBlock :: PandocMonad m
=> Text -- ^ block tag name
- -> ParserT Text ParserState m Blocks -- ^ implicit block
- -> ParserT Text ParserState m Blocks
+ -> TextileParser m Blocks -- ^ implicit block
+ -> TextileParser m Blocks
maybeExplicitBlock name blk = try $ do
optional $ explicitBlockStart name
blk
@@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
-inline :: PandocMonad m => ParserT Text ParserState m Inlines
+inline :: PandocMonad m => TextileParser m Inlines
inline = choice inlineParsers <?> "inline"
-- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
+inlineParsers :: PandocMonad m => [TextileParser m Inlines]
inlineParsers = [ str
, whitespace
, endline
@@ -447,7 +451,7 @@ inlineParsers = [ str
]
-- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineMarkup :: PandocMonad m => TextileParser m Inlines
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
, simpleInline (string "**") B.strong
, simpleInline (string "__") B.emph
@@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
]
-- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT Text st m Inlines
+mark :: PandocMonad m => TextileParser m Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
-reg :: PandocMonad m => ParserT Text st m Inlines
+reg :: PandocMonad m => TextileParser m Inlines
reg = do
oneOf "Rr"
char ')'
return $ B.str "\174"
-tm :: PandocMonad m => ParserT Text st m Inlines
+tm :: PandocMonad m => TextileParser m Inlines
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ B.str "\8482"
-copy :: PandocMonad m => ParserT Text st m Inlines
+copy :: PandocMonad m => TextileParser m Inlines
copy = do
oneOf "Cc"
char ')'
return $ B.str "\169"
-note :: PandocMonad m => ParserT Text ParserState m Inlines
+note :: PandocMonad m => TextileParser m Inlines
note = try $ do
ref <- char '[' *> many1 digit <* char ']'
notes <- stateNotes <$> getState
@@ -507,13 +511,13 @@ wordBoundaries :: [Char]
wordBoundaries = markupChars <> stringBreakers
-- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
+hyphenedWords :: PandocMonad m => TextileParser m Text
hyphenedWords = do
x <- wordChunk
xs <- many (try $ char '-' >> wordChunk)
return $ T.intercalate "-" (x:xs)
-wordChunk :: PandocMonad m => ParserT Text ParserState m Text
+wordChunk :: PandocMonad m => TextileParser m Text
wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( noneOf wordBoundaries <|>
@@ -522,7 +526,7 @@ wordChunk = try $ do
return $ T.pack $ hd:tl
-- | Any string
-str :: PandocMonad m => ParserT Text ParserState m Inlines
+str :: PandocMonad m => TextileParser m Inlines
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediately
@@ -535,11 +539,11 @@ str = do
return $ B.str fullStr
-- | Some number of space chars
-whitespace :: PandocMonad m => ParserT Text st m Inlines
+whitespace :: PandocMonad m => TextileParser m Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT Text ParserState m Inlines
+endline :: PandocMonad m => TextileParser m Inlines
endline = try $ do
newline
notFollowedBy blankline
@@ -547,18 +551,18 @@ endline = try $ do
notFollowedBy rawHtmlBlock
return B.linebreak
-rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
+rawHtmlInline :: PandocMonad m => TextileParser m Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
B.rawInline "latex" <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
-link :: PandocMonad m => ParserT Text ParserState m Inlines
+link :: PandocMonad m => TextileParser m Inlines
link = try $ do
bracketed <- (True <$ char '[') <|> return False
char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -578,7 +582,7 @@ link = try $ do
else B.spanWith attr $ B.link url "" name'
-- | image embedding
-image :: PandocMonad m => ParserT Text ParserState m Inlines
+image :: PandocMonad m => TextileParser m Inlines
image = try $ do
char '!' >> notFollowedBy space
(ident, cls, kvs) <- attributes
@@ -590,51 +594,51 @@ image = try $ do
char '!'
return $ B.imageWith attr src alt (B.str alt)
-escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedInline :: PandocMonad m => TextileParser m Inlines
escapedInline = escapedEqs <|> escapedTag
-escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs :: PandocMonad m => TextileParser m Inlines
escapedEqs = B.str . T.pack <$>
try (string "==" *> manyTill anyChar' (try $ string "=="))
-- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag :: PandocMonad m => TextileParser m Inlines
escapedTag = B.str . T.pack <$>
try (string "<notextile>" *>
manyTill anyChar' (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol :: PandocMonad m => TextileParser m Inlines
symbol = B.str . T.singleton <$> (notFollowedBy newline *>
notFollowedBy rawHtmlBlock *>
oneOf wordBoundaries)
-- | Inline code
-code :: PandocMonad m => ParserT Text ParserState m Inlines
+code :: PandocMonad m => TextileParser m Inlines
code = code1 <|> code2
-- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT Text ParserState m Char
+anyChar' :: PandocMonad m => TextileParser m Char
anyChar' =
satisfy (/='\n') <|>
try (char '\n' <* notFollowedBy blankline)
-code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 :: PandocMonad m => TextileParser m Inlines
code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
-code2 :: PandocMonad m => ParserT Text ParserState m Inlines
+code2 :: PandocMonad m => TextileParser m Inlines
code2 = do
htmlTag (tagOpen (=="tt") null)
B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT Text ParserState m Attr
+attributes :: PandocMonad m => TextileParser m Attr
attributes = foldl' (flip ($)) ("",[],[]) <$>
try (do special <- option id specialAttribute
attrs <- many attribute
return (special : attrs))
-specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
specialAttribute = do
alignStr <- ("center" <$ char '=') <|>
("justify" <$ try (string "<>")) <|>
@@ -643,11 +647,11 @@ specialAttribute = do
notFollowedBy spaceChar
return $ addStyle $ T.pack $ "text-align:" ++ alignStr
-attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
attribute = try $
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
-classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
classIdAttr = try $ do -- (class class #id)
char '('
ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
@@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
classes'
-> return $ \(_,_,keyvals) -> ("",classes',keyvals)
-styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
styleAttr = do
style <- try $ enclosed (char '{') (char '}') anyChar'
return $ addStyle $ T.pack style
@@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) =
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
-langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
langAttr = do
lang <- try $ enclosed (char '[') (char ']') alphaNum
return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
-- | Parses material surrounded by a parser.
surrounded :: (PandocMonad m, Show t)
- => ParserT Text st m t -- ^ surrounding parser
- -> ParserT Text st m a -- ^ content parser (to be used repeatedly)
- -> ParserT Text st m [a]
+ => ParserT Sources st m t -- ^ surrounding parser
+ -> ParserT Sources st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT Sources st m [a]
surrounded border =
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
simpleInline :: PandocMonad m
- => ParserT Text ParserState m t -- ^ surrounding parser
+ => TextileParser m t -- ^ surrounding parser
-> (Inlines -> Inlines) -- ^ Inline constructor
- -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly)
+ -> TextileParser m Inlines -- ^ content parser (to be used repeatedly)
simpleInline border construct = try $ do
notAfterString
border *> notFollowedBy (oneOf " \t\n\r")
@@ -700,7 +704,7 @@ simpleInline border construct = try $ do
then body
else B.spanWith attr body
-groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
groupedInlineMarkup = try $ do
char '['
sp1 <- option mempty $ B.space <$ whitespace
@@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do
char ']'
return $ sp1 <> result <> sp2
-eof' :: Monad m => ParserT Text s m Char
+eof' :: Monad m => ParserT Sources s m Char
eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index fb4b662c5..5c414fdec 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -30,23 +30,23 @@ import Text.Pandoc.Definition
import Text.Pandoc.Logging (Verbosity (..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, safeRead)
+import Text.Pandoc.Shared (safeRead)
import Text.Pandoc.XML (fromEntities)
import Text.Printf (printf)
-- | Read TikiWiki from an input string and return a Pandoc document.
-readTikiWiki :: PandocMonad m
+readTikiWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readTikiWiki opts s = do
- res <- readWithM parseTikiWiki def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ let sources = ensureFinalNewlines 2 (toSources s)
+ res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
-type TikiWikiParser = ParserT Text ParserState
+type TikiWikiParser = ParserT Sources ParserState
--
-- utility functions
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index d355a4b55..6f92f0063 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
-type T2T = ParserT Text ParserState (Reader T2TMeta)
+type T2T = ParserT Sources ParserState (Reader T2TMeta)
-- | An object for the T2T macros meta information
-- the contents of each field is simply substituted verbatim into the file
@@ -68,15 +68,15 @@ getT2TMeta = do
(intercalate ", " inps) outp
-- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: PandocMonad m
+readTxt2Tags :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readTxt2Tags opts s = do
+ let sources = ensureFinalNewlines 2 (toSources s)
meta <- getT2TMeta
let parsed = flip runReader meta $
- readWithM parseT2T (def {stateOptions = opts}) $
- crFilter s <> "\n\n"
+ readWithM parseT2T (def {stateOptions = opts}) sources
case parsed of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 74dac5ea7..460f304c4 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
many1Till, orderedListMarker, readWithM,
registerHeader, spaceChar, stateMeta,
stateOptions, uri, manyTillChar, manyChar, textStr,
- many1Char, countChar, many1TillChar)
-import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+ many1Char, countChar, many1TillChar,
+ alphaNum, anyChar, char, newline, noneOf, oneOf,
+ space, spaces, string)
+import Text.Pandoc.Sources (ToSources(..), Sources)
+import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
isURI, tshow)
-import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
- spaces, string)
import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
manyTill, notFollowedBy, option, skipMany1)
import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
-readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readVimwiki opts s = do
- res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
+ let sources = toSources s
+ res <- readWithM parseVimwiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right result -> return result
-type VwParser = ParserT Text ParserState
+type VwParser = ParserT Sources ParserState
-- constants
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index e389c1727..920edca7b 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -298,6 +298,7 @@ tabFilter tabStop = T.unlines . map go . T.lines
(tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
<> go (T.drop 1 s2)
+{-# DEPRECATED crFilter "readers filter crs automatically" #-}
-- | Strip out DOS line endings.
crFilter :: T.Text -> T.Text
crFilter = T.filter (/= '\r')
diff --git a/src/Text/Pandoc/Sources.hs b/src/Text/Pandoc/Sources.hs
new file mode 100644
index 000000000..5511ccfb8
--- /dev/null
+++ b/src/Text/Pandoc/Sources.hs
@@ -0,0 +1,195 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.Sources
+ Copyright : Copyright (C) 2021 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Defines Sources object to be used as input to pandoc parsers and redefines Char
+parsers so they get source position information from it.
+-}
+
+module Text.Pandoc.Sources
+ ( Sources(..)
+ , ToSources(..)
+ , UpdateSourcePos(..)
+ , sourcesToText
+ , initialSourceName
+ , addToSources
+ , ensureFinalNewlines
+ , addToInput
+ , satisfy
+ , oneOf
+ , noneOf
+ , anyChar
+ , char
+ , string
+ , newline
+ , space
+ , spaces
+ , letter
+ , digit
+ , hexDigit
+ , alphaNum
+ )
+where
+import qualified Text.Parsec as P
+import Text.Parsec (Stream(..), ParsecT)
+import Text.Parsec.Pos as P
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
+import Data.String (IsString(..))
+import qualified Data.List.NonEmpty as NonEmpty
+
+-- | A list of inputs labeled with source positions. It is assumed
+-- that the 'Text's have @\n@ line endings.
+newtype Sources = Sources { unSources :: [(SourcePos, Text)] }
+ deriving (Show, Semigroup, Monoid)
+
+instance Monad m => Stream Sources m Char where
+ uncons (Sources []) = return Nothing
+ uncons (Sources ((pos,t):rest)) =
+ case T.uncons t of
+ Nothing -> uncons (Sources rest)
+ Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest))
+
+instance IsString Sources where
+ fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))]
+
+class ToSources a where
+ toSources :: a -> Sources
+
+instance ToSources Text where
+ toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)]
+
+instance ToSources [(FilePath, Text)] where
+ toSources = Sources
+ . map (\(fp,t) ->
+ (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n'))
+
+instance ToSources Sources where
+ toSources = id
+
+sourcesToText :: Sources -> Text
+sourcesToText (Sources xs) = mconcat $ map snd xs
+
+addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
+addToSources pos t = do
+ curpos <- P.getPosition
+ Sources xs <- P.getInput
+ let xs' = case xs of
+ [] -> []
+ ((_,t'):rest) -> (curpos,t'):rest
+ P.setInput $ Sources ((pos, T.filter (/='\r') t):xs')
+
+ensureFinalNewlines :: Int -- ^ number of trailing newlines
+ -> Sources
+ -> Sources
+ensureFinalNewlines n (Sources xs) =
+ case NonEmpty.nonEmpty xs of
+ Nothing -> Sources [(initialPos "", T.replicate n "\n")]
+ Just lst ->
+ case NonEmpty.last lst of
+ (spos, t) ->
+ case T.length (T.takeWhileEnd (=='\n') t) of
+ len | len >= n -> Sources xs
+ | otherwise -> Sources (NonEmpty.init lst ++
+ [(spos,
+ t <> T.replicate (n - len) "\n")])
+
+class UpdateSourcePos s c where
+ updateSourcePos :: SourcePos -> c -> s -> SourcePos
+
+instance UpdateSourcePos Text Char where
+ updateSourcePos pos c _ = updatePosChar pos c
+
+instance UpdateSourcePos Sources Char where
+ updateSourcePos pos c sources =
+ case sources of
+ Sources [] -> updatePosChar pos c
+ Sources ((_,t):(pos',_):_)
+ | T.null t -> pos'
+ Sources _ ->
+ case c of
+ '\n' -> incSourceLine (setSourceColumn pos 1) 1
+ '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4))
+ _ -> incSourceColumn pos 1
+
+-- | Get name of first source in 'Sources'.
+initialSourceName :: Sources -> FilePath
+initialSourceName (Sources []) = ""
+initialSourceName (Sources ((pos,_):_)) = sourceName pos
+
+-- | Add some text to the beginning of the input sources.
+-- This simplifies code that expands macros.
+addToInput :: Monad m => Text -> ParsecT Sources u m ()
+addToInput t = do
+ Sources xs <- P.getInput
+ case xs of
+ [] -> P.setInput $ Sources [(initialPos "",t)]
+ (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest)
+
+-- We need to redefine the parsers in Text.Parsec.Char so that they
+-- update source positions properly from the Sources stream.
+
+satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => (Char -> Bool) -> ParsecT s u m Char
+satisfy f = P.tokenPrim show updateSourcePos matcher
+ where
+ matcher c = if f c then Just c else Nothing
+
+oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => [Char] -> ParsecT s u m Char
+oneOf cs = satisfy (`elem` cs)
+
+noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => [Char] -> ParsecT s u m Char
+noneOf cs = satisfy (`notElem` cs)
+
+anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+anyChar = satisfy (const True)
+
+char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => Char -> ParsecT s u m Char
+char c = satisfy (== c)
+
+string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => [Char] -> ParsecT s u m [Char]
+string = mapM char
+
+newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+newline = satisfy (== '\n')
+
+space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+space = satisfy isSpace
+
+spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m ()
+spaces = P.skipMany space P.<?> "white space"
+
+letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+letter = satisfy isLetter
+
+alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+alphaNum = satisfy isAlphaNum
+
+digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+digit = satisfy isDigit
+
+hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+hexDigit = satisfy isHexDigit