aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/BibTeX.hs13
-rw-r--r--src/Text/Pandoc/Readers/CSV.hs14
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs70
-rw-r--r--src/Text/Pandoc/Readers/Creole.hs11
-rw-r--r--src/Text/Pandoc/Readers/CslJson.hs9
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs11
-rw-r--r--src/Text/Pandoc/Readers/DokuWiki.hs17
-rw-r--r--src/Text/Pandoc/Readers/FB2.hs9
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs33
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs12
-rw-r--r--src/Text/Pandoc/Readers/Ipynb.hs8
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs11
-rw-r--r--src/Text/Pandoc/Readers/Jira.hs16
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs23
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Citation.hs2
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs12
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs15
-rw-r--r--src/Text/Pandoc/Readers/Man.hs29
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs49
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs13
-rw-r--r--src/Text/Pandoc/Readers/Metadata.hs26
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs17
-rw-r--r--src/Text/Pandoc/Readers/Native.hs12
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs13
-rw-r--r--src/Text/Pandoc/Readers/Org.hs11
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs3
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs2
-rw-r--r--src/Text/Pandoc/Readers/RST.hs76
-rw-r--r--src/Text/Pandoc/Readers/Roff.hs34
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs12
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs172
-rw-r--r--src/Text/Pandoc/Readers/TikiWiki.hs12
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs12
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs19
34 files changed, 443 insertions, 355 deletions
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index b82a81350..318afda85 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -23,30 +23,33 @@ where
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, cite, str)
-import Data.Text (Text)
import Citeproc (Lang(..), parseLang)
import Citeproc.Locale (getLocale)
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Class (PandocMonad, lookupEnv)
import Text.Pandoc.Citeproc.BibTeX as BibTeX
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Text.Pandoc.Sources (ToSources(..))
import Control.Monad.Except (throwError)
-- | Read BibTeX from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibTeX :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
readBibTeX = readBibTeX' BibTeX.Bibtex
-- | Read BibLaTeX from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibLaTeX :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
readBibLaTeX = readBibTeX' BibTeX.Biblatex
-readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
+readBibTeX' :: (PandocMonad m, ToSources a)
+ => Variant -> ReaderOptions -> a -> m Pandoc
readBibTeX' variant _opts t = do
mblangEnv <- lookupEnv "LANG"
let defaultLang = Lang "en" Nothing (Just "US") [] [] []
@@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do
Left _ -> throwError $ PandocCiteprocError e
Right l -> return l
case BibTeX.readBibtexString variant locale (const True) t of
- Left e -> throwError $ PandocParsecError t e
+ Left e -> throwError $ PandocParsecError (toSources t) e
Right refs -> return $ setMeta "references"
(map referenceToMetaValue refs)
. setMeta "nocite"
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 2958d6180..eca8f9425 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -13,23 +13,23 @@
Conversion from CSV to a 'Pandoc' table.
-}
module Text.Pandoc.Readers.CSV ( readCSV ) where
-import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.Pandoc.Options (ReaderOptions)
import Control.Monad.Except (throwError)
-readCSV :: PandocMonad m
+readCSV :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ Text to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
-readCSV _opts s =
- case parseCSV defaultCSVOptions (crFilter s) of
+readCSV _opts s = do
+ let txt = sourcesToText $ toSources s
+ case parseCSV defaultCSVOptions txt of
Right (r:rs) -> return $ B.doc $ B.table capt
(zip aligns widths)
(TableHead nullAttr hdrs)
@@ -45,4 +45,4 @@ readCSV _opts s =
aligns = replicate numcols AlignDefault
widths = replicate numcols ColWidthDefault
Right [] -> return $ B.doc mempty
- Left e -> throwError $ PandocParsecError s e
+ Left e -> throwError $ PandocParsecError (toSources [("",txt)]) e
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 244f77940..b099a9b50 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
import Control.Monad.Except
import Data.Functor.Identity (runIdentity)
import Data.Typeable
-import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine,
- runF, defaultParserState, take1WhileP, option)
+import Text.Pandoc.Parsing (runParserT, getPosition,
+ runF, defaultParserState, option, many1, anyChar,
+ Sources(..), ToSources(..), ParserT, Future,
+ sourceName)
import qualified Data.Text as T
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCommonMark :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
readCommonMark opts s
- | isEnabled Ext_yaml_metadata_block opts
- , "---" `T.isPrefixOf` s = do
- let metaValueParser = do
- inp <- option "" $ take1WhileP (const True)
- case runIdentity
- (commonmarkWith (specFor opts) "metadata value" inp) of
- Left _ -> mzero
- Right (Cm bls :: Cm () Blocks)
- -> return $ return $ B.toMetaValue bls
- res <- runParserT (do meta <- yamlMetaBlock metaValueParser
- pos <- getPosition
- return (meta, pos))
- defaultParserState "YAML metadata" s
- case res of
- Left _ -> readCommonMarkBody opts s
- Right (meta, pos) -> do
- let dropLines 0 = id
- dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n')
- let metaLines = sourceLine pos - 1
- let body = T.replicate metaLines "\n" <> dropLines metaLines s
- Pandoc _ bs <- readCommonMarkBody opts body
- return $ Pandoc (runF meta defaultParserState) bs
- | otherwise = readCommonMarkBody opts s
+ | isEnabled Ext_yaml_metadata_block opts = do
+ let sources = toSources s
+ let toks = concatMap sourceToToks (unSources sources)
+ res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
+ pos <- getPosition
+ return (meta, pos))
+ defaultParserState "YAML metadata" (toSources s)
+ case res of
+ Left _ -> readCommonMarkBody opts sources toks
+ Right (meta, pos) -> do
+ -- strip off metadata section and parse body
+ let body = dropWhile (\t -> tokPos t < pos) toks
+ Pandoc _ bs <- readCommonMarkBody opts sources body
+ return $ Pandoc (runF meta defaultParserState) bs
+ | otherwise = do
+ let sources = toSources s
+ let toks = concatMap sourceToToks (unSources sources)
+ readCommonMarkBody opts sources toks
-readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMarkBody opts s
+sourceToToks :: (SourcePos, Text) -> [Tok]
+sourceToToks (pos, s) = tokenize (sourceName pos) s
+
+metaValueParser :: Monad m
+ => ReaderOptions -> ParserT Sources st m (Future st MetaValue)
+metaValueParser opts = do
+ inp <- option "" $ T.pack <$> many1 anyChar
+ let toks = concatMap sourceToToks (unSources (toSources inp))
+ case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+ Left _ -> mzero
+ Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
+readCommonMarkBody opts s toks
| isEnabled Ext_sourcepos opts =
- case runIdentity (commonmarkWith (specFor opts) "" s) of
+ case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
| otherwise =
- case runIdentity (commonmarkWith (specFor opts) "" s) of
+ case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 2658dfea2..ad848ada7 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed)
-import Text.Pandoc.Shared (crFilter)
-
-- | Read creole from an input string and return a Pandoc document.
-readCreole :: PandocMonad m
+readCreole :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readCreole opts s = do
- res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
+ let sources = ensureFinalNewlines 2 (toSources s)
+ res <- readWithM parseCreole def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
-type CRLParser = ParserT Text ParserState
+type CRLParser = ParserT Sources ParserState
--
-- Utility functions
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 30bb19483..a0af5c325 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -24,21 +24,22 @@ import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, cite, str)
import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
import Control.Monad.Except (throwError)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-- | Read CSL JSON from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCslJson _opts t =
- case cslJsonToReferences (UTF8.fromText t) of
+readCslJson :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
+readCslJson _opts x =
+ case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
Left e -> throwError $ PandocParseError $ T.pack e
Right refs -> return $ setMeta "references"
(map referenceToMetaValue refs)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ac3caa2c0..3db459cfd 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -30,7 +30,8 @@ import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
import Text.Pandoc.Options
import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.TeXMath (readMathML, writeTeX)
import Text.Pandoc.XML.Light
@@ -539,11 +540,15 @@ instance Default DBState where
, dbContent = [] }
-readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readDocBook _ inp = do
+ let sources = toSources inp
tree <- either (throwError . PandocXMLError "") return $
parseXMLContents
- (TL.fromStrict . handleInstructions $ crFilter inp)
+ (TL.fromStrict . handleInstructions . sourcesToText $ sources)
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index dedc1f03f..db98ac8de 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -29,26 +29,27 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
+import Text.Pandoc.Shared (trim, stringify, tshow)
-- | Read DokuWiki from an input string and return a Pandoc document.
-readDokuWiki :: PandocMonad m
+readDokuWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readDokuWiki opts s = do
- let input = crFilter s
- res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+ let sources = toSources s
+ res <- runParserT parseDokuWiki def {stateOptions = opts }
+ (initialSourceName sources) sources
case res of
- Left e -> throwError $ PandocParsecError input e
+ Left e -> throwError $ PandocParsecError sources e
Right d -> return d
-type DWParser = ParserT Text ParserState
+type DWParser = ParserT Sources ParserState
-- * Utility functions
-- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
eol = void newline <|> eof
nested :: PandocMonad m => DWParser m a -> DWParser m a
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index 66e390bd7..84e5278db 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.XML.Light
import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
type FB2 m = StateT FB2State m
@@ -63,9 +63,12 @@ instance HasMeta FB2State where
setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
-readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readFB2 :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readFB2 _ inp =
- case parseXMLElement $ TL.fromStrict $ crFilter inp of
+ case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
Left msg -> throwError $ PandocXMLError "" msg
Right el -> do
(bs, st) <- runStateT (parseRootElement el) def
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c3e68afd8..f5c8a2277 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -62,21 +62,21 @@ import Text.Pandoc.Options (
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Shared (
- addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces,
+ addMetaField, blocksToInlines', escapeURI, extractSpaces,
htmlSpanLikeElements, renderTags', safeRead, tshow)
import Text.Pandoc.Walk
import Text.Parsec.Error
import Text.TeXMath (readMathML, writeTeX)
-- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
+readHtml :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assumes @'\n'@ line endings)
+ -> a -- ^ Input to parse
-> m Pandoc
readHtml opts inp = do
let tags = stripPrefixes $ canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True }
- (crFilter inp)
+ (sourcesToText $ toSources inp)
parseDoc = do
blocks <- fixPlains False . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
@@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
pTagText :: PandocMonad m => TagParser m Inlines
pTagText = try $ do
+ pos <- getPosition
(TagText str) <- pSatisfy isTagText
st <- getState
qu <- ask
parsed <- lift $ lift $
- flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+ flip runReaderT qu $ runParserT (many pTagContents) st "text"
+ (Sources [(pos, str)])
case parsed of
Left _ -> throwError $ PandocParseError $
"Could not parse `" <> str <> "'"
Right result -> return $ mconcat result
-type InlinesParser m = HTMLParser m Text
+type InlinesParser m = HTMLParser m Sources
pTagContents :: PandocMonad m => InlinesParser m Inlines
pTagContents =
@@ -970,13 +972,14 @@ isCommentTag = tagComment (const True)
-- | Matches a stretch of HTML in balanced tags.
htmlInBalanced :: Monad m
=> (Tag Text -> Bool)
- -> ParserT Text st m Text
+ -> ParserT Sources st m Text
htmlInBalanced f = try $ do
lookAhead (char '<')
- inp <- getInput
- let ts = canonicalizeTags $
- parseTagsOptions parseOptions{ optTagWarning = True,
- optTagPosition = True } inp
+ sources <- getInput
+ let ts = canonicalizeTags
+ $ parseTagsOptions parseOptions{ optTagWarning = True,
+ optTagPosition = True }
+ $ sourcesToText sources
case ts of
(TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
guard $ f t
@@ -1018,15 +1021,17 @@ hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
htmlTag :: (HasReaderOptions st, Monad m)
=> (Tag Text -> Bool)
- -> ParserT Text st m (Tag Text, Text)
+ -> ParserT Sources st m (Tag Text, Text)
htmlTag f = try $ do
lookAhead (char '<')
startpos <- getPosition
- inp <- getInput
+ sources <- getInput
+ let inp = sourcesToText sources
let ts = canonicalizeTags $ parseTagsOptions
parseOptions{ optTagWarning = False
, optTagPosition = True }
- (inp <> " ") -- add space to ensure that
+ (inp <> " ")
+ -- add space to ensure that
-- we get a TagPosition after the tag
(next, ln, col) <- case ts of
(TagPosition{} : next : TagPosition ln col : _)
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 48454e353..35eaac0a9 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -19,7 +19,7 @@ import Control.Monad.Except (throwError)
import Data.List (intersperse)
import Data.List.NonEmpty (nonEmpty)
import Data.Maybe (fromMaybe)
-import Data.Text (Text, unpack)
+import Data.Text (unpack)
import qualified Data.Text as T
import Documentation.Haddock.Parser
import Documentation.Haddock.Types as H
@@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.Shared (splitTextBy, trim)
-- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: PandocMonad m
+readHaddock :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
+readHaddock opts s = case readHaddockEither opts
+ (unpack . sourcesToText . toSources $ s) of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index 70296bb6b..cd1093109 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -39,10 +39,12 @@ import Data.Aeson as Aeson
import Control.Monad.Except (throwError)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readIpynb opts t = do
- let src = BL.fromStrict (TE.encodeUtf8 t)
+readIpynb :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
+readIpynb opts x = do
+ let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
case eitherDecode src of
Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
Left _ ->
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c068f3774..9cdbf1611 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
import Text.TeXMath (readMathML, writeTeX)
import Text.Pandoc.XML.Light
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
type JATS m = StateT JATSState m
@@ -52,10 +53,14 @@ instance Default JATSState where
, jatsContent = [] }
-readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readJATS :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readJATS _ inp = do
+ let sources = toSources inp
tree <- either (throwError . PandocXMLError "") return $
- parseXMLContents (TL.fromStrict $ crFilter inp)
+ parseXMLContents (TL.fromStrict . sourcesToText $ sources)
(bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 89aecbf56..a3b415f09 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
import Text.Pandoc.Error (PandocError (PandocParseError))
import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Shared (stringify)
-
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import qualified Text.Jira.Markup as Jira
-- | Read Jira wiki markup.
-readJira :: PandocMonad m
+readJira :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
-readJira _opts s = case parse s of
- Right d -> return $ jiraToPandoc d
- Left e -> throwError . PandocParseError $
- "Jira parse error" `append` pack (show e)
+readJira _opts inp = do
+ let sources = toSources inp
+ case parse (sourcesToText sources) of
+ Right d -> return $ jiraToPandoc d
+ Left e -> throwError . PandocParseError $
+ "Jira parse error" `append` pack (show e)
jiraToPandoc :: Jira.Doc -> Pandoc
jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9ad168293..f90d562ae 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty)
-- import Debug.Trace (traceShowId)
-- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: PandocMonad m
+readLaTeX :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assumes @'\n'@ line endings)
+ -> a -- ^ Input to parse
-> m Pandoc
readLaTeX opts ltx = do
+ let sources = toSources ltx
parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
- (tokenize "source" (crFilter ltx))
+ (tokenizeSources sources)
case parsed of
Right result -> return result
- Left e -> throwError $ PandocParsecError ltx e
+ Left e -> throwError $ PandocParsecError sources e
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
@@ -132,11 +133,11 @@ resolveRefs _ x = x
rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => ParserT Text s m Text
+ => ParserT Sources s m Text
rawLaTeXBlock = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" inp
+ let toks = tokenizeSources inp
snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
<|> rawLaTeXParser toks True
(do choice (map controlSeq
@@ -163,11 +164,11 @@ beginOrEndCommand = try $ do
(txt <> untokenize rawargs)
rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => ParserT Text s m Text
+ => ParserT Sources s m Text
rawLaTeXInline = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" inp
+ let toks = tokenizeSources inp
raw <- snd <$>
( rawLaTeXParser toks True
(mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
@@ -178,11 +179,11 @@ rawLaTeXInline = do
finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
return $ raw <> T.pack finalbraces
-inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
inlineCommand = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" inp
+ let toks = tokenizeSources inp
fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
inlines
@@ -641,7 +642,7 @@ opt = do
parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
case parsed of
Right result -> return result
- Left e -> throwError $ PandocParsecError (untokenize toks) e
+ Left e -> throwError $ PandocParsecError (toSources toks) e
-- block elements:
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
index 655823dab..af97125c6 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Citation.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do
runParserT (mconcat <$> many inline) st "bracketed option" toks
case parsed of
Right result -> return result
- Left e -> throwError $ PandocParsecError (untokenize toks) e
+ Left e -> throwError $ PandocParsecError (toSources toks) e
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index db58b333d..35ce3509d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, rawLaTeXParser
, applyMacros
, tokenize
+ , tokenizeSources
, untokenize
, untoken
, totoks
@@ -248,7 +249,7 @@ withVerbatimMode parser = do
rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> [Tok] -> Bool -> LP m a -> LP m a
- -> ParserT Text s m (a, Text)
+ -> ParserT Sources s m (a, Text)
rawLaTeXParser toks retokenize parser valParser = do
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate }
@@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do
Left _ -> mzero
Right ((val, raw), st) -> do
updateState (updateMacros (sMacros st <>))
- _ <- takeP (T.length (untokenize toks'))
+ void $ count (T.length (untokenize toks')) anyChar
let result = untokenize raw
-- ensure we end with space if input did, see #4442
let result' =
@@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do
return (val, result')
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => Text -> ParserT Text s m Text
+ => Text -> ParserT Sources s m Text
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
do let retokenize = untokenize <$> many (satisfyTok (const True))
pstate <- getState
@@ -301,6 +302,11 @@ QuickCheck property:
> let t = T.pack s in untokenize (tokenize "random" t) == t
-}
+tokenizeSources :: Sources -> [Tok]
+tokenizeSources = concatMap tokenizeSource . unSources
+ where
+ tokenizeSource (pos, t) = totoks pos t
+
tokenize :: SourceName -> Text -> [Tok]
tokenize sourcename = totoks (initialPos sourcename)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index f8c214318..c20b72bc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleInstances #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Types
Copyright : Copyright (C) 2017-2021 John MacFarlane
@@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
)
where
import Data.Text (Text)
-import Text.Parsec.Pos (SourcePos)
+import Text.Parsec.Pos (SourcePos, sourceName)
+import Text.Pandoc.Sources
+import Data.List (groupBy)
data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
Esc1 | Esc2 | Arg Int
@@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
data Tok = Tok SourcePos TokType Text
deriving (Eq, Ord, Show)
+instance ToSources [Tok] where
+ toSources = Sources
+ . map (\ts -> case ts of
+ Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
+ _ -> error "toSources [Tok] encountered empty group")
+ . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
+
+tokToText :: Tok -> Text
+tokToText (Tok _ _ t) = t
+
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
deriving (Eq, Ord, Show)
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 21b8feaab..1141af66f 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
import Control.Monad.Trans (lift)
import Control.Monad.Except (throwError)
import Data.Maybe (catMaybes, isJust)
-import Data.List (intersperse, intercalate)
+import Data.List (intersperse)
import qualified Data.Text as T
import Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
@@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Walk (query)
-import Text.Pandoc.Shared (crFilter, mapLeft)
+import Text.Pandoc.Shared (mapLeft)
import Text.Pandoc.Readers.Roff -- TODO explicit imports
-import Text.Parsec hiding (tokenPrim)
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosString)
import qualified Data.Foldable as Foldable
@@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
-- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
-readMan opts txt = do
- tokenz <- lexRoff (initialPos "input") (crFilter txt)
+readMan :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
+readMan opts s = do
+ let Sources inps = toSources s
+ tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
let state = def {readerOptions = opts} :: ManState
+ let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
+ fixError e = e
eitherdoc <- readWithMTokens parseMan state
(Foldable.toList . unRoffTokens $ tokenz)
- either throwError return eitherdoc
+ either (throwError . fixError) return eitherdoc
+
readWithMTokens :: PandocMonad m
=> ParserT [RoffToken] ManState m a -- ^ parser
@@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
-> [RoffToken] -- ^ input
-> m (Either PandocError a)
readWithMTokens parser state input =
- let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
+ let leftF = PandocParsecError mempty
in mapLeft leftF `liftM` runParserT parser state "source" input
+
parseMan :: PandocMonad m => ManParser m Pandoc
parseMan = do
bs <- many parseBlock <* eof
@@ -89,7 +96,7 @@ parseBlock = choice [ parseList
parseTable :: PandocMonad m => ManParser m Blocks
parseTable = do
- modifyState $ \st -> st { tableCellsPlain = True }
+ updateState $ \st -> st { tableCellsPlain = True }
let isTbl Tbl{} = True
isTbl _ = False
Tbl _opts rows pos <- msatisfy isTbl
@@ -135,7 +142,7 @@ parseTable = do
case res' of
Left _ -> Prelude.fail "Could not parse table cell"
Right x -> do
- modifyState $ \s -> s{ tableCellsPlain = False }
+ updateState $ \s -> s{ tableCellsPlain = False }
return x
Right x -> return x
@@ -222,7 +229,7 @@ parseTitle = do
setMeta "section" (linePartsToInlines y)
[x] -> setMeta "title" (linePartsToInlines x)
[] -> id
- modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
+ updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
return mempty
linePartsToInlines :: [LinePart] -> Inlines
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index ba8ed147e..69dd51bc4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
+-- import Debug.Trace (traceShowId)
-type MarkdownParser m = ParserT Text ParserState m
+type MarkdownParser m = ParserT Sources ParserState m
type F = Future ParserState
-- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: PandocMonad m
+readMarkdown :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a -- ^ Input
-> m Pandoc
readMarkdown opts s = do
parsed <- readWithM parseMarkdown def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ (ensureFinalNewlines 3 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
@@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do
meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
setPosition oldPos
return $ runF meta defaultParserState
- parsed <- readWithM parser def{ stateOptions = opts } ""
+ parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
case parsed of
Right result -> return result
Left e -> throwError e
@@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do
refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
setPosition oldPos
return $ runF refs defaultParserState
- parsed <- readWithM parser def{ stateOptions = opts } ""
+ parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
case parsed of
Right result -> return result
Left e -> throwError e
@@ -146,14 +147,14 @@ inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
-spnl :: PandocMonad m => ParserT Text st m ()
+spnl :: PandocMonad m => ParserT Sources st m ()
spnl = try $ do
skipSpaces
optional newline
skipSpaces
notFollowedBy (char '\n')
-spnl' :: PandocMonad m => ParserT Text st m Text
+spnl' :: PandocMonad m => ParserT Sources st m Text
spnl' = try $ do
xs <- many spaceChar
ys <- option "" $ try $ (:) <$> newline
@@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _)
-- hrule block
--
-hrule :: PandocMonad m => ParserT Text st m (F Blocks)
+hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline
blockDelimiter :: PandocMonad m
=> (Char -> Bool)
-> Maybe Int
- -> ParserT Text ParserState m Int
+ -> ParserT Sources ParserState m Int
blockDelimiter f len = try $ do
skipNonindentSpaces
c <- lookAhead (satisfy f)
@@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ T.intercalate "\n" lns'
-birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
+birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
@@ -1025,7 +1026,7 @@ para = try $ do
option (B.plain <$> result)
$ try $ do
newline
- (blanklines >> return mempty)
+ (mempty <$ blanklines)
<|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
@@ -1170,7 +1171,7 @@ lineBlock = do
-- and the length including trailing space.
dashedLine :: PandocMonad m
=> Char
- -> ParserT Text st m (Int, Int)
+ -> ParserT Sources st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
@@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m
-> MarkdownParser m [Text]
rawTableLine indices = do
notFollowedBy' (blanklines' <|> tableFooter)
- line <- take1WhileP (/='\n') <* newline
+ line <- anyLine
return $ map trim $ tail $
splitTextByIndices (init indices) line
@@ -1390,7 +1391,7 @@ pipeTableCell =
return $ B.plain <$> result)
<|> return mempty
-pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
@@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do
(Just _,Just _) -> AlignCenter, len)
-- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT Text st m ()
+scanForPipe :: PandocMonad m => ParserT Sources st m ()
scanForPipe = do
- inp <- getInput
- case T.break (\c -> c == '\n' || c == '|') inp of
+ Sources inps <- getInput
+ let ln = case inps of
+ [] -> ""
+ ((_,t):(_,t'):_) | T.null t -> t'
+ ((_,t):_) -> t
+ case T.break (\c -> c == '\n' || c == '|') ln of
(_, T.uncons -> Just ('|', _)) -> return ()
_ -> mzero
@@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
-nonEndline :: PandocMonad m => ParserT Text st m Char
+nonEndline :: PandocMonad m => ParserT Sources st m Char
nonEndline = satisfy (/='\n')
str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
result <- mconcat <$> many1
- ( take1WhileP isAlphaNum
+ ( T.pack <$> (many1 alphaNum)
<|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
updateLastStrPos
(do guardEnabled Ext_smart
@@ -1962,7 +1967,7 @@ rawLaTeXInline' = do
s <- rawLaTeXInline
return $ return $ B.rawInline "tex" s -- "tex" because it might be context
-rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
+rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do
(try $ string "\\stop" >> textStr completion)
return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
-inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
+inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
inBrackets parser = do
char '['
contents <- manyChar parser
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 9f4d5e170..825e4a2eb 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -36,17 +36,18 @@ import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
trim, splitTextBy, tshow)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML (fromEntities)
-- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: PandocMonad m
- => ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+readMediaWiki :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
-> m Pandoc
readMediaWiki opts s = do
+ let sources = toSources s
parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
@@ -55,7 +56,7 @@ readMediaWiki opts s = do
, mwLogMessages = []
, mwInTT = False
}
- (crFilter s <> "\n")
+ sources
case parsed of
Right result -> return result
Left e -> throwError e
@@ -69,7 +70,7 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwInTT :: Bool
}
-type MWParser m = ParserT Text MWState m
+type MWParser m = ParserT Sources MWState m
instance HasReaderOptions MWState where
extractReaderOptions = mwOptions
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index cb141cba5..bbcfe62ea 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL
import qualified Text.Pandoc.UTF8 as UTF8
yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
- => ParserT Text st m (Future st MetaValue)
+ => ParserT Sources st m (Future st MetaValue)
-> BL.ByteString
- -> ParserT Text st m (Future st Meta)
+ -> ParserT Sources st m (Future st Meta)
yamlBsToMeta pMetaValue bstr = do
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing
-- Returns filtered list of references.
yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
- => ParserT Text st m (Future st MetaValue)
+ => ParserT Sources st m (Future st MetaValue)
-> (Text -> Bool) -- ^ Filter for id
-> BL.ByteString
- -> ParserT Text st m (Future st [MetaValue])
+ -> ParserT Sources st m (Future st [MetaValue])
yamlBsToRefs pMetaValue idpred bstr =
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
nodeToKey _ = Nothing
normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
- => ParserT Text st m (Future st MetaValue)
+ => ParserT Sources st m (Future st MetaValue)
-> Text
- -> ParserT Text st m (Future st MetaValue)
+ -> ParserT Sources st m (Future st MetaValue)
normalizeMetaValue pMetaValue x =
-- Note: a standard quoted or unquoted YAML value will
-- not end in a newline, but a "block" set off with
@@ -133,9 +133,9 @@ checkBoolean t
| otherwise = Nothing
yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
- => ParserT Text st m (Future st MetaValue)
+ => ParserT Sources st m (Future st MetaValue)
-> YAML.Node YE.Pos
- -> ParserT Text st m (Future st MetaValue)
+ -> ParserT Sources st m (Future st MetaValue)
yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
case x of
YAML.SStr t -> normalizeMetaValue pMetaValue t
@@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
yamlToMetaValue _ _ = return $ return $ MetaString ""
yamlMap :: (PandocMonad m, HasLastStrPosition st)
- => ParserT Text st m (Future st MetaValue)
+ => ParserT Sources st m (Future st MetaValue)
-> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
- -> ParserT Text st m (Future st (M.Map Text MetaValue))
+ -> ParserT Sources st m (Future st (M.Map Text MetaValue))
yamlMap pMetaValue o = do
kvs <- forM (M.toList o) $ \(key, v) -> do
k <- maybe (throwError $ PandocParseError
@@ -177,8 +177,8 @@ yamlMap pMetaValue o = do
-- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
- => ParserT Text st m (Future st MetaValue)
- -> ParserT Text st m (Future st Meta)
+ => ParserT Sources st m (Future st MetaValue)
+ -> ParserT Sources st m (Future st Meta)
yamlMetaBlock parser = try $ do
string "---"
blankline
@@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do
optional blanklines
yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-stopLine :: Monad m => ParserT Text st m ()
+stopLine :: Monad m => ParserT Sources st m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index 698bfd3d7..a0d4534f1 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing
-import Text.Pandoc.Shared (crFilter, trimr, tshow)
+import Text.Pandoc.Shared (trimr, tshow)
-- | Read Muse from an input string and return a Pandoc document.
-readMuse :: PandocMonad m
+readMuse :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readMuse opts s = do
- let input = crFilter s
- res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
+ let sources = toSources s
+ res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
+ (initialSourceName sources) sources
case res of
- Left e -> throwError $ PandocParsecError input e
+ Left e -> throwError $ PandocParsecError sources e
Right d -> return d
type F = Future MuseState
@@ -82,7 +83,7 @@ instance Default MuseEnv where
, museInPara = False
}
-type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
+type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
instance HasReaderOptions MuseState where
extractReaderOptions = museOptions
@@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
-- * Parsers
-- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
eol = void newline <|> eof
getIndent :: PandocMonad m
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 9c8bc0374..58f235e81 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
import Data.Text (Text)
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
@@ -32,14 +33,15 @@ import Text.Pandoc.Error
--
-- > Pandoc nullMeta [Plain [Str "hi"]]
--
-readNative :: PandocMonad m
+readNative :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readNative _ s =
- case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
- Right doc -> return doc
- Left _ -> throwError $ PandocParseError "couldn't read native"
+ let t = sourcesToText . toSources $ s
+ in case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
+ Right doc -> return doc
+ Left _ -> throwError $ PandocParseError "couldn't read native"
readBlocks :: Text -> Either PandocError [Block]
readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 5f2ddb876..668c9ca11 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -24,7 +24,8 @@ import Text.Pandoc.Options
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
-import Text.Pandoc.Shared (crFilter, blocksToInlines')
+import Text.Pandoc.Shared (blocksToInlines')
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.Pandoc.XML.Light
import Control.Monad.Except (throwError)
@@ -46,10 +47,14 @@ instance Default OPMLState where
, opmlOptions = def
}
-readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readOPML :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readOPML opts inp = do
- (bs, st') <- runStateT
- (case parseXMLContents (TL.fromStrict (crFilter inp)) of
+ let sources = toSources inp
+ (bs, st') <-
+ runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
Left msg -> throwError $ PandocXMLError "" msg
Right ns -> mapM parseBlock ns)
def{ opmlOptions = opts }
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index afeb27a87..8823befdd 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing (reportLogMessages)
-import Text.Pandoc.Shared (crFilter)
-
+import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
-import Data.Text (Text)
-
-- | Parse org-mode string and return a Pandoc document.
-readOrg :: PandocMonad m
+readOrg :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readOrg opts s = do
parsed <- flip runReaderT def $
readWithM parseOrg (optionsToParserState opts)
- (crFilter s <> "\n\n")
+ (ensureFinalNewlines 2 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 519a6ce04..054f2611a 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -29,6 +29,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
+import Text.Pandoc.Sources (ToSources(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Control.Monad (guard, mplus, mzero, unless, void, when)
@@ -802,7 +803,7 @@ inlineLaTeX = try $ do
parseAsInlineLaTeX :: PandocMonad m
=> Text -> TeXExport -> OrgParser m (Maybe Inlines)
parseAsInlineLaTeX cs = \case
- TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
+ TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
TeXIgnore -> return (Just mempty)
TeXVerbatim -> return (Just $ B.str cs)
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 6ed24a602..c7ea02815 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -114,7 +114,7 @@ import Control.Monad (guard)
import Control.Monad.Reader (ReaderT)
-- | The parser used to read org files.
-type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
--
-- Adaptions and specializations of parsing utilities
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index ac4c0b6cb..a3fcf028c 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -38,25 +38,24 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Printf (printf)
import Data.Time.Format
-- TODO:
-- [ ] .. parsed-literal
-- | Parse reStructuredText string and return Pandoc document.
-readRST :: PandocMonad m
+readRST :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ Text to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readRST opts s = do
parsed <- readWithM parseRST def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ (ensureFinalNewlines 2 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
-type RSTParser m = ParserT Text ParserState m
+type RSTParser m = ParserT Sources ParserState m
--
-- Constants and data structure definitions
@@ -151,11 +150,19 @@ parseRST = do
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- T.concat <$>
- manyTill (referenceKey <|> anchorDef <|>
- noteBlock <|> citationBlock <|>
- (snd <$> withRaw comment) <|>
- headerBlock <|> lineClump) eof
+ let chunk = referenceKey
+ <|> anchorDef
+ <|> noteBlock
+ <|> citationBlock
+ <|> (snd <$> withRaw comment)
+ <|> headerBlock
+ <|> lineClump
+ docMinusKeys <- Sources <$>
+ manyTill (do pos <- getPosition
+ t <- chunk
+ return (pos, t)) eof
+ -- UGLY: we collapse source position information.
+ -- TODO: fix the parser to use the F monad instead of two passes
setInput docMinusKeys
setPosition startPos
st' <- getState
@@ -348,7 +355,7 @@ singleHeader' = try $ do
-- hrule block
--
-hrule :: Monad m => ParserT Text st m Blocks
+hrule :: Monad m => ParserT Sources st m Blocks
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@@ -363,7 +370,7 @@ hrule = try $ do
-- read a line indented by a given string
indentedLine :: (HasReaderOptions st, Monad m)
- => Int -> ParserT Text st m Text
+ => Int -> ParserT Sources st m Text
indentedLine indents = try $ do
lookAhead spaceChar
gobbleAtMostSpaces indents
@@ -372,7 +379,7 @@ indentedLine indents = try $ do
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
indentedBlock :: (HasReaderOptions st, Monad m)
- => ParserT Text st m Text
+ => ParserT Sources st m Text
indentedBlock = try $ do
indents <- length <$> lookAhead (many1 spaceChar)
lns <- many1 $ try $ do b <- option "" blanklines
@@ -381,20 +388,20 @@ indentedBlock = try $ do
optional blanklines
return $ T.unlines lns
-quotedBlock :: Monad m => ParserT Text st m Text
+quotedBlock :: Monad m => ParserT Sources st m Text
quotedBlock = try $ do
quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
lns <- many1 $ lookAhead (char quote) >> anyLine
optional blanklines
return $ T.unlines lns
-codeBlockStart :: Monad m => ParserT Text st m Char
+codeBlockStart :: Monad m => ParserT Sources st m Char
codeBlockStart = string "::" >> blankline >> blankline
-codeBlock :: Monad m => ParserT Text ParserState m Blocks
+codeBlock :: Monad m => ParserT Sources ParserState m Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
-codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
+codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
codeBlockBody = do
lang <- stateRstHighlight <$> getState
try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
@@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
return $ B.codeBlockWith ("", ["haskell","literate"], [])
$ T.intercalate "\n" lns
-latexCodeBlock :: Monad m => ParserT Text st m [Text]
+latexCodeBlock :: Monad m => ParserT Sources st m [Text]
latexCodeBlock = try $ do
try (latexBlockLine "\\begin{code}")
many1Till anyLine (try $ latexBlockLine "\\end{code}")
where
latexBlockLine s = skipMany spaceChar >> string s >> blankline
-birdCodeBlock :: Monad m => ParserT Text st m [Text]
+birdCodeBlock :: Monad m => ParserT Sources st m [Text]
birdCodeBlock = filterSpace <$> many1 birdTrackLine
where filterSpace lns =
-- if (as is normal) there is always a space after >, drop it
@@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
then map (T.drop 1) lns
else lns
-birdTrackLine :: Monad m => ParserT Text st m Text
+birdTrackLine :: Monad m => ParserT Sources st m Text
birdTrackLine = char '>' >> anyLine
--
@@ -456,7 +463,6 @@ includeDirective top fields body = do
let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
oldPos <- getPosition
- oldInput <- getInput
containers <- stateContainers <$> getState
when (f `elem` containers) $
throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
@@ -494,15 +500,11 @@ includeDirective top fields body = do
Nothing -> case lookup "literal" fields of
Just _ -> return $ B.rawBlock "rst" contents'
Nothing -> do
- setPosition $ newPos (T.unpack f) 1 1
- setInput $ contents' <> "\n"
- bs <- optional blanklines >>
- (mconcat <$> many block)
- setInput oldInput
- setPosition oldPos
+ addToSources (initialPos (T.unpack f))
+ (contents' <> "\n")
updateState $ \s -> s{ stateContainers =
tail $ stateContainers s }
- return bs
+ return mempty
--
@@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
definitionList = B.definitionList <$> many1 definitionListItem
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT Text st m Int
+bulletListStart :: Monad m => ParserT Sources st m Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
@@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
-simpleReferenceName :: Monad m => ParserT Text st m Text
+simpleReferenceName :: Monad m => ParserT Sources st m Text
simpleReferenceName = do
x <- alphaNum
xs <- many $ alphaNum
@@ -1122,7 +1124,7 @@ referenceKey = do
-- return enough blanks to replace key
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-targetURI :: Monad m => ParserT Text st m Text
+targetURI :: Monad m => ParserT Sources st m Text
targetURI = do
skipSpaces
optional $ try $ newline >> notFollowedBy blankline
@@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m ()
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
- pos <- getPosition
- let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
+ -- we need to ensure that the keys are ordered by occurrence in
+ -- the document.
+ numKeys <- M.size . stateKeys <$> getState
+ let key = toKey $ "_" <> T.pack (show numKeys)
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
@@ -1250,13 +1254,13 @@ headerBlock = do
-- Grid tables TODO:
-- - column spans
-dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
-simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
@@ -1382,7 +1386,7 @@ hyphens = do
-- don't want to treat endline after hyphen or dash as a space
return $ B.str result
-escapedChar :: Monad m => ParserT Text st m Inlines
+escapedChar :: Monad m => ParserT Sources st m Inlines
escapedChar = do c <- escaped anyChar
return $ if c == ' ' || c == '\n' || c == '\r'
-- '\ ' is null in RST
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 509ce1377..47f16ef4b 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Shared (safeRead)
-import Text.Parsec hiding (tokenPrim)
import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
import qualified Data.Sequence as Seq
import qualified Data.Foldable as Foldable
@@ -122,16 +121,16 @@ instance Default RoffState where
, afterConditional = False
}
-type RoffLexer m = ParserT T.Text RoffState m
+type RoffLexer m = ParserT Sources RoffState m
--
-- Lexer: T.Text -> RoffToken
--
-eofline :: Stream s m Char => ParsecT s u m ()
+eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
-spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
spacetab = char ' ' <|> char '\t'
characterCodeMap :: M.Map T.Text Char
@@ -303,8 +302,7 @@ expandString = try $ do
char '*'
cs <- escapeArg <|> countChar 1 anyChar
s <- linePartsToText <$> resolveText cs pos
- getInput >>= setInput . (s <>)
- return ()
+ addToInput s
-- Parses: '..'
quoteArg :: PandocMonad m => RoffLexer m T.Text
@@ -316,7 +314,7 @@ escFont = do
font' <- if T.null font || font == "P"
then prevFont <$> getState
else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
- modifyState $ \st -> st{ prevFont = currentFont st
+ updateState $ \st -> st{ prevFont = currentFont st
, currentFont = font' }
return [Font font']
where
@@ -372,8 +370,8 @@ lexTable pos = do
spaces
opts <- try tableOptions <|> [] <$ optional (char ';')
case lookup "tab" opts of
- Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
- _ -> modifyState $ \st -> st{ tableTabChar = '\t' }
+ Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
+ _ -> updateState $ \st -> st{ tableTabChar = '\t' }
spaces
skipMany lexComment
spaces
@@ -489,18 +487,18 @@ lexConditional mname = do
ifPart <- do
optional $ try $ char '\\' >> newline
lexGroup
- <|> do modifyState $ \s -> s{ afterConditional = True }
+ <|> do updateState $ \s -> s{ afterConditional = True }
t <- manToken
- modifyState $ \s -> s{ afterConditional = False }
+ updateState $ \s -> s{ afterConditional = False }
return t
case mbtest of
Nothing -> do
- putState st -- reset state, so we don't record macros in skipped section
+ setState st -- reset state, so we don't record macros in skipped section
report $ SkippedContent (T.cons '.' mname) pos
return mempty
Just True -> return ifPart
Just False -> do
- putState st
+ setState st
return mempty
expression :: PandocMonad m => RoffLexer m (Maybe Bool)
@@ -515,7 +513,7 @@ expression = do
_ -> Nothing
where
returnValue v = do
- modifyState $ \st -> st{ lastExpression = v }
+ updateState $ \st -> st{ lastExpression = v }
return v
lexGroup :: PandocMonad m => RoffLexer m RoffTokens
@@ -536,7 +534,7 @@ lexIncludeFile args = do
result <- readFileFromDirs dirs $ T.unpack fp
case result of
Nothing -> report $ CouldNotLoadIncludeFile fp pos
- Just s -> getInput >>= setInput . (s <>)
+ Just s -> addToInput s
return mempty
[] -> return mempty
@@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
(x:ys) -> do
let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
let stringName = linePartsToText x
- modifyState $ \st ->
+ updateState $ \st ->
st{ customMacros = M.insert stringName ts (customMacros st) }
return mempty
lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
lexMacroDef args = do -- macro definition
- modifyState $ \st -> st{ roffMode = CopyMode }
+ updateState $ \st -> st{ roffMode = CopyMode }
(macroName, stopMacro) <-
case args of
(x : y : _) -> return (linePartsToText x, linePartsToText y)
@@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
_ <- lexArgs
return ()
ts <- mconcat <$> manyTill manToken stop
- modifyState $ \st ->
+ updateState $ \st ->
st{ customMacros = M.insert macroName ts (customMacros st)
, roffMode = NormalMode }
return mempty
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index c4d7bcc93..276d28aaa 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -28,22 +28,22 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, tshow)
+import Text.Pandoc.Shared (tshow)
import Text.Pandoc.XML (fromEntities)
-- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
+readTWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readTWiki opts s = do
- res <- readWithM parseTWiki def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ let sources = ensureFinalNewlines 2 (toSources s)
+ res <- readWithM parseTWiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
-type TWParser = ParserT Text ParserState
+type TWParser = ParserT Sources ParserState
--
-- utility functions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 8d7900de4..981878206 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -53,30 +53,34 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Text.Pandoc.Shared (trim, tshow)
-- | Parse a Textile text and return a Pandoc document.
-readTextile :: PandocMonad m
+readTextile :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readTextile opts s = do
- parsed <- readWithM parseTextile def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ let sources = ensureFinalNewlines 2 (toSources s)
+ parsed <- readWithM parseTextile def{ stateOptions = opts } sources
case parsed of
Right result -> return result
Left e -> throwError e
+type TextileParser = ParserT Sources ParserState
-- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
+parseTextile :: PandocMonad m => TextileParser m Pandoc
parseTextile = do
many blankline
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys/notes were...
- let firstPassParser = noteBlock <|> lineClump
- manyTill firstPassParser eof >>= setInput . T.concat
+ let firstPassParser = do
+ pos <- getPosition
+ t <- noteBlock <|> lineClump
+ return (pos, t)
+ manyTill firstPassParser eof >>= setInput . Sources
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
@@ -84,10 +88,10 @@ parseTextile = do
-- now parse it for real...
Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
-noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker :: PandocMonad m => TextileParser m Text
noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
-noteBlock :: PandocMonad m => ParserT Text ParserState m Text
+noteBlock :: PandocMonad m => TextileParser m Text
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
@@ -102,11 +106,11 @@ noteBlock = try $ do
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
+parseBlocks :: PandocMonad m => TextileParser m Blocks
parseBlocks = mconcat <$> manyTill block eof
-- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
+blockParsers :: PandocMonad m => [TextileParser m Blocks]
blockParsers = [ codeBlock
, header
, blockQuote
@@ -121,22 +125,22 @@ blockParsers = [ codeBlock
]
-- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT Text ParserState m Blocks
+block :: PandocMonad m => TextileParser m Blocks
block = do
res <- choice blockParsers <?> "block"
trace (T.take 60 $ tshow $ B.toList res)
return res
-commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+commentBlock :: PandocMonad m => TextileParser m Blocks
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
return mempty
-codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlock :: PandocMonad m => TextileParser m Blocks
codeBlock = codeBlockTextile <|> codeBlockHtml
-codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockTextile :: PandocMonad m => TextileParser m Blocks
codeBlockTextile = try $ do
string "bc." <|> string "pre."
extended <- option False (True <$ char '.')
@@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
trimTrailingNewlines = T.dropWhileEnd (=='\n')
-- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockHtml :: PandocMonad m => TextileParser m Blocks
codeBlockHtml = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
@@ -174,7 +178,7 @@ codeBlockHtml = try $ do
return $ B.codeBlockWith (ident,classes,kvs) result'''
-- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT Text ParserState m Blocks
+header :: PandocMonad m => TextileParser m Blocks
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
@@ -186,14 +190,14 @@ header = try $ do
return $ B.headerWith attr' level name
-- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
+blockQuote :: PandocMonad m => TextileParser m Blocks
blockQuote = try $ do
string "bq" >> attributes >> char '.' >> whitespace
B.blockQuote <$> para
-- Horizontal rule
-hrule :: PandocMonad m => ParserT Text st m Blocks
+hrule :: PandocMonad m => TextileParser m Blocks
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@@ -208,39 +212,39 @@ hrule = try $ do
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
-anyList :: PandocMonad m => ParserT Text ParserState m Blocks
+anyList :: PandocMonad m => TextileParser m Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return $ B.orderedList items
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
genericListItemAtDepth c depth = try $ do
count depth (char c) >> attributes >> whitespace
contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
return $ contents <> sublist
-- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
+definitionList :: PandocMonad m => TextileParser m Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
-- | List start character.
-listStart :: PandocMonad m => ParserT Text ParserState m ()
+listStart :: PandocMonad m => TextileParser m ()
listStart = genericListStart '*'
<|> () <$ genericListStart '#'
<|> () <$ definitionListStart
-genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
+genericListStart :: PandocMonad m => Char -> TextileParser m ()
genericListStart c = () <$ try (many1 (char c) >> whitespace)
-basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
+basicDLStart :: PandocMonad m => TextileParser m ()
basicDLStart = do
char '-'
whitespace
notFollowedBy newline
-definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
+definitionListStart :: PandocMonad m => TextileParser m Inlines
definitionListStart = try $ do
basicDLStart
trimInlines . mconcat <$>
@@ -281,15 +285,15 @@ definitionListStart = try $ do
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
-definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
definitionListItem = try $ do
term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
return (term, def')
- where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+ where inlineDef :: PandocMonad m => TextileParser m [Blocks]
inlineDef = liftM (\d -> [B.plain d])
$ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
- multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+ multilineDef :: PandocMonad m => TextileParser m [Blocks]
multilineDef = try $ do
optional whitespace >> newline
s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
@@ -300,7 +304,7 @@ definitionListItem = try $ do
-- raw content
-- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
rawHtmlBlock = try $ do
skipMany spaceChar
(_,b) <- htmlTag isBlockTag
@@ -308,14 +312,14 @@ rawHtmlBlock = try $ do
return $ B.rawBlock "html" b
-- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT Text ParserState m Blocks
+para :: PandocMonad m => TextileParser m Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
@@ -326,7 +330,7 @@ toAlignment '>' = AlignRight
toAlignment '=' = AlignCenter
toAlignment _ = AlignDefault
-cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
cellAttributes = try $ do
isHeader <- option False (True <$ char '_')
-- we just ignore colspan and rowspan markers:
@@ -339,7 +343,7 @@ cellAttributes = try $ do
return (isHeader, alignment)
-- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
tableCell = try $ do
char '|'
(isHeader, alignment) <- option (False, AlignDefault) cellAttributes
@@ -350,7 +354,7 @@ tableCell = try $ do
return ((isHeader, alignment), B.plain content)
-- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
tableRow = try $ do
-- skip optional row attributes
optional $ try $ do
@@ -360,7 +364,7 @@ tableRow = try $ do
many1 tableCell <* char '|' <* blankline
-- | A table with an optional header.
-table :: PandocMonad m => ParserT Text ParserState m Blocks
+table :: PandocMonad m => TextileParser m Blocks
table = try $ do
-- ignore table attributes
caption <- option mempty $ try $ do
@@ -388,7 +392,7 @@ table = try $ do
(TableFoot nullAttr [])
-- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
+ignorableRow :: PandocMonad m => TextileParser m ()
ignorableRow = try $ do
char '|'
oneOf ":^-~"
@@ -397,7 +401,7 @@ ignorableRow = try $ do
_ <- anyLine
return ()
-explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
explicitBlockStart name = try $ do
string (T.unpack name)
attributes
@@ -409,8 +413,8 @@ explicitBlockStart name = try $ do
-- However, they can be used to set HTML/CSS attributes when needed.
maybeExplicitBlock :: PandocMonad m
=> Text -- ^ block tag name
- -> ParserT Text ParserState m Blocks -- ^ implicit block
- -> ParserT Text ParserState m Blocks
+ -> TextileParser m Blocks -- ^ implicit block
+ -> TextileParser m Blocks
maybeExplicitBlock name blk = try $ do
optional $ explicitBlockStart name
blk
@@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
-inline :: PandocMonad m => ParserT Text ParserState m Inlines
+inline :: PandocMonad m => TextileParser m Inlines
inline = choice inlineParsers <?> "inline"
-- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
+inlineParsers :: PandocMonad m => [TextileParser m Inlines]
inlineParsers = [ str
, whitespace
, endline
@@ -447,7 +451,7 @@ inlineParsers = [ str
]
-- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineMarkup :: PandocMonad m => TextileParser m Inlines
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
, simpleInline (string "**") B.strong
, simpleInline (string "__") B.emph
@@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
]
-- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT Text st m Inlines
+mark :: PandocMonad m => TextileParser m Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
-reg :: PandocMonad m => ParserT Text st m Inlines
+reg :: PandocMonad m => TextileParser m Inlines
reg = do
oneOf "Rr"
char ')'
return $ B.str "\174"
-tm :: PandocMonad m => ParserT Text st m Inlines
+tm :: PandocMonad m => TextileParser m Inlines
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ B.str "\8482"
-copy :: PandocMonad m => ParserT Text st m Inlines
+copy :: PandocMonad m => TextileParser m Inlines
copy = do
oneOf "Cc"
char ')'
return $ B.str "\169"
-note :: PandocMonad m => ParserT Text ParserState m Inlines
+note :: PandocMonad m => TextileParser m Inlines
note = try $ do
ref <- char '[' *> many1 digit <* char ']'
notes <- stateNotes <$> getState
@@ -507,13 +511,13 @@ wordBoundaries :: [Char]
wordBoundaries = markupChars <> stringBreakers
-- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
+hyphenedWords :: PandocMonad m => TextileParser m Text
hyphenedWords = do
x <- wordChunk
xs <- many (try $ char '-' >> wordChunk)
return $ T.intercalate "-" (x:xs)
-wordChunk :: PandocMonad m => ParserT Text ParserState m Text
+wordChunk :: PandocMonad m => TextileParser m Text
wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( noneOf wordBoundaries <|>
@@ -522,7 +526,7 @@ wordChunk = try $ do
return $ T.pack $ hd:tl
-- | Any string
-str :: PandocMonad m => ParserT Text ParserState m Inlines
+str :: PandocMonad m => TextileParser m Inlines
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediately
@@ -535,11 +539,11 @@ str = do
return $ B.str fullStr
-- | Some number of space chars
-whitespace :: PandocMonad m => ParserT Text st m Inlines
+whitespace :: PandocMonad m => TextileParser m Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT Text ParserState m Inlines
+endline :: PandocMonad m => TextileParser m Inlines
endline = try $ do
newline
notFollowedBy blankline
@@ -547,18 +551,18 @@ endline = try $ do
notFollowedBy rawHtmlBlock
return B.linebreak
-rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
+rawHtmlInline :: PandocMonad m => TextileParser m Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
B.rawInline "latex" <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
-link :: PandocMonad m => ParserT Text ParserState m Inlines
+link :: PandocMonad m => TextileParser m Inlines
link = try $ do
bracketed <- (True <$ char '[') <|> return False
char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -578,7 +582,7 @@ link = try $ do
else B.spanWith attr $ B.link url "" name'
-- | image embedding
-image :: PandocMonad m => ParserT Text ParserState m Inlines
+image :: PandocMonad m => TextileParser m Inlines
image = try $ do
char '!' >> notFollowedBy space
(ident, cls, kvs) <- attributes
@@ -590,51 +594,51 @@ image = try $ do
char '!'
return $ B.imageWith attr src alt (B.str alt)
-escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedInline :: PandocMonad m => TextileParser m Inlines
escapedInline = escapedEqs <|> escapedTag
-escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs :: PandocMonad m => TextileParser m Inlines
escapedEqs = B.str . T.pack <$>
try (string "==" *> manyTill anyChar' (try $ string "=="))
-- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag :: PandocMonad m => TextileParser m Inlines
escapedTag = B.str . T.pack <$>
try (string "<notextile>" *>
manyTill anyChar' (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol :: PandocMonad m => TextileParser m Inlines
symbol = B.str . T.singleton <$> (notFollowedBy newline *>
notFollowedBy rawHtmlBlock *>
oneOf wordBoundaries)
-- | Inline code
-code :: PandocMonad m => ParserT Text ParserState m Inlines
+code :: PandocMonad m => TextileParser m Inlines
code = code1 <|> code2
-- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT Text ParserState m Char
+anyChar' :: PandocMonad m => TextileParser m Char
anyChar' =
satisfy (/='\n') <|>
try (char '\n' <* notFollowedBy blankline)
-code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 :: PandocMonad m => TextileParser m Inlines
code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
-code2 :: PandocMonad m => ParserT Text ParserState m Inlines
+code2 :: PandocMonad m => TextileParser m Inlines
code2 = do
htmlTag (tagOpen (=="tt") null)
B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT Text ParserState m Attr
+attributes :: PandocMonad m => TextileParser m Attr
attributes = foldl' (flip ($)) ("",[],[]) <$>
try (do special <- option id specialAttribute
attrs <- many attribute
return (special : attrs))
-specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
specialAttribute = do
alignStr <- ("center" <$ char '=') <|>
("justify" <$ try (string "<>")) <|>
@@ -643,11 +647,11 @@ specialAttribute = do
notFollowedBy spaceChar
return $ addStyle $ T.pack $ "text-align:" ++ alignStr
-attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
attribute = try $
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
-classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
classIdAttr = try $ do -- (class class #id)
char '('
ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
@@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
classes'
-> return $ \(_,_,keyvals) -> ("",classes',keyvals)
-styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
styleAttr = do
style <- try $ enclosed (char '{') (char '}') anyChar'
return $ addStyle $ T.pack style
@@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) =
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
-langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
langAttr = do
lang <- try $ enclosed (char '[') (char ']') alphaNum
return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
-- | Parses material surrounded by a parser.
surrounded :: (PandocMonad m, Show t)
- => ParserT Text st m t -- ^ surrounding parser
- -> ParserT Text st m a -- ^ content parser (to be used repeatedly)
- -> ParserT Text st m [a]
+ => ParserT Sources st m t -- ^ surrounding parser
+ -> ParserT Sources st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT Sources st m [a]
surrounded border =
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
simpleInline :: PandocMonad m
- => ParserT Text ParserState m t -- ^ surrounding parser
+ => TextileParser m t -- ^ surrounding parser
-> (Inlines -> Inlines) -- ^ Inline constructor
- -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly)
+ -> TextileParser m Inlines -- ^ content parser (to be used repeatedly)
simpleInline border construct = try $ do
notAfterString
border *> notFollowedBy (oneOf " \t\n\r")
@@ -700,7 +704,7 @@ simpleInline border construct = try $ do
then body
else B.spanWith attr body
-groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
groupedInlineMarkup = try $ do
char '['
sp1 <- option mempty $ B.space <$ whitespace
@@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do
char ']'
return $ sp1 <> result <> sp2
-eof' :: Monad m => ParserT Text s m Char
+eof' :: Monad m => ParserT Sources s m Char
eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index fb4b662c5..5c414fdec 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -30,23 +30,23 @@ import Text.Pandoc.Definition
import Text.Pandoc.Logging (Verbosity (..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, safeRead)
+import Text.Pandoc.Shared (safeRead)
import Text.Pandoc.XML (fromEntities)
import Text.Printf (printf)
-- | Read TikiWiki from an input string and return a Pandoc document.
-readTikiWiki :: PandocMonad m
+readTikiWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readTikiWiki opts s = do
- res <- readWithM parseTikiWiki def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ let sources = ensureFinalNewlines 2 (toSources s)
+ res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
-type TikiWikiParser = ParserT Text ParserState
+type TikiWikiParser = ParserT Sources ParserState
--
-- utility functions
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index d355a4b55..6f92f0063 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
-type T2T = ParserT Text ParserState (Reader T2TMeta)
+type T2T = ParserT Sources ParserState (Reader T2TMeta)
-- | An object for the T2T macros meta information
-- the contents of each field is simply substituted verbatim into the file
@@ -68,15 +68,15 @@ getT2TMeta = do
(intercalate ", " inps) outp
-- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: PandocMonad m
+readTxt2Tags :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readTxt2Tags opts s = do
+ let sources = ensureFinalNewlines 2 (toSources s)
meta <- getT2TMeta
let parsed = flip runReader meta $
- readWithM parseT2T (def {stateOptions = opts}) $
- crFilter s <> "\n\n"
+ readWithM parseT2T (def {stateOptions = opts}) sources
case parsed of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 74dac5ea7..460f304c4 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
many1Till, orderedListMarker, readWithM,
registerHeader, spaceChar, stateMeta,
stateOptions, uri, manyTillChar, manyChar, textStr,
- many1Char, countChar, many1TillChar)
-import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+ many1Char, countChar, many1TillChar,
+ alphaNum, anyChar, char, newline, noneOf, oneOf,
+ space, spaces, string)
+import Text.Pandoc.Sources (ToSources(..), Sources)
+import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
isURI, tshow)
-import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
- spaces, string)
import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
manyTill, notFollowedBy, option, skipMany1)
import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
-readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readVimwiki opts s = do
- res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
+ let sources = toSources s
+ res <- readWithM parseVimwiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right result -> return result
-type VwParser = ParserT Text ParserState
+type VwParser = ParserT Sources ParserState
-- constants