diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-05-01 13:17:45 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-05-09 19:11:34 -0600 |
commit | 6e45607f9948f45b2e94f54b4825b667ca0d5441 (patch) | |
tree | c4cbc0f6d398c4c4cf28fda23665fe19d26602b3 /src/Text/Pandoc/Readers | |
parent | 295d93e96b1853c2ff4658aa7206ea1329024fab (diff) | |
download | pandoc-6e45607f9948f45b2e94f54b4825b667ca0d5441.tar.gz |
Change reader types, allowing better tracking of source positions.
Previously, when multiple file arguments were provided, pandoc
simply concatenated them and passed the contents to the readers,
which took a Text argument.
As a result, the readers had no way of knowing which file
was the source of any particular bit of text. This meant that
we couldn't report accurate source positions on errors or
include accurate source positions as attributes in the AST.
More seriously, it meant that we couldn't resolve resource
paths relative to the files containing them
(see e.g. #5501, #6632, #6384, #3752).
Add Text.Pandoc.Sources (exported module), with a `Sources` type
and a `ToSources` class. A `Sources` wraps a list of `(SourcePos,
Text)` pairs. [API change] A parsec `Stream` instance is provided for
`Sources`. The module also exports versions of parsec's `satisfy` and
other Char parsers that track source positions accurately from a
`Sources` stream (or any instance of the new `UpdateSourcePos` class).
Text.Pandoc.Parsing now exports these modified Char parsers instead of
the ones parsec provides. Modified parsers to use a `Sources` as stream
[API change].
The readers that previously took a `Text` argument have been
modified to take any instance of `ToSources`. So, they may still
be used with a `Text`, but they can also be used with a `Sources`
object.
In Text.Pandoc.Error, modified the constructor PandocParsecError
to take a `Sources` rather than a `Text` as first argument,
so parse error locations can be accurately reported.
T.P.Error: showPos, do not print "-" as source name.
Diffstat (limited to 'src/Text/Pandoc/Readers')
34 files changed, 443 insertions, 355 deletions
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs index b82a81350..318afda85 100644 --- a/src/Text/Pandoc/Readers/BibTeX.hs +++ b/src/Text/Pandoc/Readers/BibTeX.hs @@ -23,30 +23,33 @@ where import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, cite, str) -import Data.Text (Text) import Citeproc (Lang(..), parseLang) import Citeproc.Locale (getLocale) import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Class (PandocMonad, lookupEnv) import Text.Pandoc.Citeproc.BibTeX as BibTeX import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue) +import Text.Pandoc.Sources (ToSources(..)) import Control.Monad.Except (throwError) -- | Read BibTeX from an input string and return a Pandoc document. -- The document will have only metadata, with an empty body. -- The metadata will contain a `references` field with the -- bibliography entries, and a `nocite` field with the wildcard `[@*]`. -readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readBibTeX :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc readBibTeX = readBibTeX' BibTeX.Bibtex -- | Read BibLaTeX from an input string and return a Pandoc document. -- The document will have only metadata, with an empty body. -- The metadata will contain a `references` field with the -- bibliography entries, and a `nocite` field with the wildcard `[@*]`. -readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readBibLaTeX :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc readBibLaTeX = readBibTeX' BibTeX.Biblatex -readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc +readBibTeX' :: (PandocMonad m, ToSources a) + => Variant -> ReaderOptions -> a -> m Pandoc readBibTeX' variant _opts t = do mblangEnv <- lookupEnv "LANG" let defaultLang = Lang "en" Nothing (Just "US") [] [] [] @@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do Left _ -> throwError $ PandocCiteprocError e Right l -> return l case BibTeX.readBibtexString variant locale (const True) t of - Left e -> throwError $ PandocParsecError t e + Left e -> throwError $ PandocParsecError (toSources t) e Right refs -> return $ setMeta "references" (map referenceToMetaValue refs) . setMeta "nocite" diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs index 2958d6180..eca8f9425 100644 --- a/src/Text/Pandoc/Readers/CSV.hs +++ b/src/Text/Pandoc/Readers/CSV.hs @@ -13,23 +13,23 @@ Conversion from CSV to a 'Pandoc' table. -} module Text.Pandoc.Readers.CSV ( readCSV ) where -import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.CSV (parseCSV, defaultCSVOptions) import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) -import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Error +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.Pandoc.Options (ReaderOptions) import Control.Monad.Except (throwError) -readCSV :: PandocMonad m +readCSV :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ Text to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc -readCSV _opts s = - case parseCSV defaultCSVOptions (crFilter s) of +readCSV _opts s = do + let txt = sourcesToText $ toSources s + case parseCSV defaultCSVOptions txt of Right (r:rs) -> return $ B.doc $ B.table capt (zip aligns widths) (TableHead nullAttr hdrs) @@ -45,4 +45,4 @@ readCSV _opts s = aligns = replicate numcols AlignDefault widths = replicate numcols ColWidthDefault Right [] -> return $ B.doc mempty - Left e -> throwError $ PandocParsecError s e + Left e -> throwError $ PandocParsecError (toSources [("",txt)]) e diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 244f77940..b099a9b50 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock) import Control.Monad.Except import Data.Functor.Identity (runIdentity) import Data.Typeable -import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine, - runF, defaultParserState, take1WhileP, option) +import Text.Pandoc.Parsing (runParserT, getPosition, + runF, defaultParserState, option, many1, anyChar, + Sources(..), ToSources(..), ParserT, Future, + sourceName) import qualified Data.Text as T -- | Parse a CommonMark formatted string into a 'Pandoc' structure. -readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readCommonMark :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc readCommonMark opts s - | isEnabled Ext_yaml_metadata_block opts - , "---" `T.isPrefixOf` s = do - let metaValueParser = do - inp <- option "" $ take1WhileP (const True) - case runIdentity - (commonmarkWith (specFor opts) "metadata value" inp) of - Left _ -> mzero - Right (Cm bls :: Cm () Blocks) - -> return $ return $ B.toMetaValue bls - res <- runParserT (do meta <- yamlMetaBlock metaValueParser - pos <- getPosition - return (meta, pos)) - defaultParserState "YAML metadata" s - case res of - Left _ -> readCommonMarkBody opts s - Right (meta, pos) -> do - let dropLines 0 = id - dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n') - let metaLines = sourceLine pos - 1 - let body = T.replicate metaLines "\n" <> dropLines metaLines s - Pandoc _ bs <- readCommonMarkBody opts body - return $ Pandoc (runF meta defaultParserState) bs - | otherwise = readCommonMarkBody opts s + | isEnabled Ext_yaml_metadata_block opts = do + let sources = toSources s + let toks = concatMap sourceToToks (unSources sources) + res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts) + pos <- getPosition + return (meta, pos)) + defaultParserState "YAML metadata" (toSources s) + case res of + Left _ -> readCommonMarkBody opts sources toks + Right (meta, pos) -> do + -- strip off metadata section and parse body + let body = dropWhile (\t -> tokPos t < pos) toks + Pandoc _ bs <- readCommonMarkBody opts sources body + return $ Pandoc (runF meta defaultParserState) bs + | otherwise = do + let sources = toSources s + let toks = concatMap sourceToToks (unSources sources) + readCommonMarkBody opts sources toks -readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readCommonMarkBody opts s +sourceToToks :: (SourcePos, Text) -> [Tok] +sourceToToks (pos, s) = tokenize (sourceName pos) s + +metaValueParser :: Monad m + => ReaderOptions -> ParserT Sources st m (Future st MetaValue) +metaValueParser opts = do + inp <- option "" $ T.pack <$> many1 anyChar + let toks = concatMap sourceToToks (unSources (toSources inp)) + case runIdentity (parseCommonmarkWith (specFor opts) toks) of + Left _ -> mzero + Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls + +readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc +readCommonMarkBody opts s toks | isEnabled Ext_sourcepos opts = - case runIdentity (commonmarkWith (specFor opts) "" s) of + case runIdentity (parseCommonmarkWith (specFor opts) toks) of Left err -> throwError $ PandocParsecError s err Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls | otherwise = - case runIdentity (commonmarkWith (specFor opts) "" s) of + case runIdentity (parseCommonmarkWith (specFor opts) toks) of Left err -> throwError $ PandocParsecError s err Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs index 2658dfea2..ad848ada7 100644 --- a/src/Text/Pandoc/Readers/Creole.hs +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed) -import Text.Pandoc.Shared (crFilter) - -- | Read creole from an input string and return a Pandoc document. -readCreole :: PandocMonad m +readCreole :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readCreole opts s = do - res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n" + let sources = ensureFinalNewlines 2 (toSources s) + res <- readWithM parseCreole def{ stateOptions = opts } sources case res of Left e -> throwError e Right d -> return d -type CRLParser = ParserT Text ParserState +type CRLParser = ParserT Sources ParserState -- -- Utility functions diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs index 30bb19483..a0af5c325 100644 --- a/src/Text/Pandoc/Readers/CslJson.hs +++ b/src/Text/Pandoc/Readers/CslJson.hs @@ -24,21 +24,22 @@ import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, cite, str) import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue) import Control.Monad.Except (throwError) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) -- | Read CSL JSON from an input string and return a Pandoc document. -- The document will have only metadata, with an empty body. -- The metadata will contain a `references` field with the -- bibliography entries, and a `nocite` field with the wildcard `[@*]`. -readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readCslJson _opts t = - case cslJsonToReferences (UTF8.fromText t) of +readCslJson :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc +readCslJson _opts x = + case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of Left e -> throwError $ PandocParseError $ T.pack e Right refs -> return $ setMeta "references" (map referenceToMetaValue refs) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index ac3caa2c0..3db459cfd 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -30,7 +30,8 @@ import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Options import Text.Pandoc.Logging (LogMessage(..)) -import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) +import Text.Pandoc.Shared (safeRead, extractSpaces) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.TeXMath (readMathML, writeTeX) import Text.Pandoc.XML.Light @@ -539,11 +540,15 @@ instance Default DBState where , dbContent = [] } -readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readDocBook :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readDocBook _ inp = do + let sources = toSources inp tree <- either (throwError . PandocXMLError "") return $ parseXMLContents - (TL.fromStrict . handleInstructions $ crFilter inp) + (TL.fromStrict . handleInstructions . sourcesToText $ sources) (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs index dedc1f03f..db98ac8de 100644 --- a/src/Text/Pandoc/Readers/DokuWiki.hs +++ b/src/Text/Pandoc/Readers/DokuWiki.hs @@ -29,26 +29,27 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) -import Text.Pandoc.Shared (crFilter, trim, stringify, tshow) +import Text.Pandoc.Shared (trim, stringify, tshow) -- | Read DokuWiki from an input string and return a Pandoc document. -readDokuWiki :: PandocMonad m +readDokuWiki :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readDokuWiki opts s = do - let input = crFilter s - res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input + let sources = toSources s + res <- runParserT parseDokuWiki def {stateOptions = opts } + (initialSourceName sources) sources case res of - Left e -> throwError $ PandocParsecError input e + Left e -> throwError $ PandocParsecError sources e Right d -> return d -type DWParser = ParserT Text ParserState +type DWParser = ParserT Sources ParserState -- * Utility functions -- | Parse end-of-line, which can be either a newline or end-of-file. -eol :: Stream s m Char => ParserT s st m () +eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () eol = void newline <|> eof nested :: PandocMonad m => DWParser m a -> DWParser m a diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index 66e390bd7..84e5278db 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter) import Text.Pandoc.XML.Light import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Sources (ToSources(..), sourcesToText) type FB2 m = StateT FB2State m @@ -63,9 +63,12 @@ instance HasMeta FB2State where setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)} deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)} -readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readFB2 :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readFB2 _ inp = - case parseXMLElement $ TL.fromStrict $ crFilter inp of + case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of Left msg -> throwError $ PandocXMLError "" msg Right el -> do (bs, st) <- runStateT (parseRootElement el) def diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index c3e68afd8..f5c8a2277 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -62,21 +62,21 @@ import Text.Pandoc.Options ( extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Shared ( - addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces, + addMetaField, blocksToInlines', escapeURI, extractSpaces, htmlSpanLikeElements, renderTags', safeRead, tshow) import Text.Pandoc.Walk import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) -- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: PandocMonad m +readHtml :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assumes @'\n'@ line endings) + -> a -- ^ Input to parse -> m Pandoc readHtml opts inp = do let tags = stripPrefixes $ canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } - (crFilter inp) + (sourcesToText $ toSources inp) parseDoc = do blocks <- fixPlains False . mconcat <$> manyTill block eof meta <- stateMeta . parserState <$> getState @@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline pTagText :: PandocMonad m => TagParser m Inlines pTagText = try $ do + pos <- getPosition (TagText str) <- pSatisfy isTagText st <- getState qu <- ask parsed <- lift $ lift $ - flip runReaderT qu $ runParserT (many pTagContents) st "text" str + flip runReaderT qu $ runParserT (many pTagContents) st "text" + (Sources [(pos, str)]) case parsed of Left _ -> throwError $ PandocParseError $ "Could not parse `" <> str <> "'" Right result -> return $ mconcat result -type InlinesParser m = HTMLParser m Text +type InlinesParser m = HTMLParser m Sources pTagContents :: PandocMonad m => InlinesParser m Inlines pTagContents = @@ -970,13 +972,14 @@ isCommentTag = tagComment (const True) -- | Matches a stretch of HTML in balanced tags. htmlInBalanced :: Monad m => (Tag Text -> Bool) - -> ParserT Text st m Text + -> ParserT Sources st m Text htmlInBalanced f = try $ do lookAhead (char '<') - inp <- getInput - let ts = canonicalizeTags $ - parseTagsOptions parseOptions{ optTagWarning = True, - optTagPosition = True } inp + sources <- getInput + let ts = canonicalizeTags + $ parseTagsOptions parseOptions{ optTagWarning = True, + optTagPosition = True } + $ sourcesToText sources case ts of (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do guard $ f t @@ -1018,15 +1021,17 @@ hasTagWarning _ = False -- | Matches a tag meeting a certain condition. htmlTag :: (HasReaderOptions st, Monad m) => (Tag Text -> Bool) - -> ParserT Text st m (Tag Text, Text) + -> ParserT Sources st m (Tag Text, Text) htmlTag f = try $ do lookAhead (char '<') startpos <- getPosition - inp <- getInput + sources <- getInput + let inp = sourcesToText sources let ts = canonicalizeTags $ parseTagsOptions parseOptions{ optTagWarning = False , optTagPosition = True } - (inp <> " ") -- add space to ensure that + (inp <> " ") + -- add space to ensure that -- we get a TagPosition after the tag (next, ln, col) <- case ts of (TagPosition{} : next : TagPosition ln col : _) diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 48454e353..35eaac0a9 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -19,7 +19,7 @@ import Control.Monad.Except (throwError) import Data.List (intersperse) import Data.List.NonEmpty (nonEmpty) import Data.Maybe (fromMaybe) -import Data.Text (Text, unpack) +import Data.Text (unpack) import qualified Data.Text as T import Documentation.Haddock.Parser import Documentation.Haddock.Types as H @@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter, splitTextBy, trim) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) +import Text.Pandoc.Shared (splitTextBy, trim) -- | Parse Haddock markup and return a 'Pandoc' document. -readHaddock :: PandocMonad m +readHaddock :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc -readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of +readHaddock opts s = case readHaddockEither opts + (unpack . sourcesToText . toSources $ s) of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index 70296bb6b..cd1093109 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -39,10 +39,12 @@ import Data.Aeson as Aeson import Control.Monad.Except (throwError) import Text.Pandoc.Readers.Markdown (readMarkdown) import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Sources (ToSources(..), sourcesToText) -readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readIpynb opts t = do - let src = BL.fromStrict (TE.encodeUtf8 t) +readIpynb :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc +readIpynb opts x = do + let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x case eitherDecode src of Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4 Left _ -> diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index c068f3774..9cdbf1611 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) +import Text.Pandoc.Shared (safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.Pandoc.XML.Light import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) type JATS m = StateT JATSState m @@ -52,10 +53,14 @@ instance Default JATSState where , jatsContent = [] } -readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readJATS :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readJATS _ inp = do + let sources = toSources inp tree <- either (throwError . PandocXMLError "") return $ - parseXMLContents (TL.fromStrict $ crFilter inp) + parseXMLContents (TL.fromStrict . sourcesToText $ sources) (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs index 89aecbf56..a3b415f09 100644 --- a/src/Text/Pandoc/Readers/Jira.hs +++ b/src/Text/Pandoc/Readers/Jira.hs @@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell) import Text.Pandoc.Error (PandocError (PandocParseError)) import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Shared (stringify) - +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import qualified Text.Jira.Markup as Jira -- | Read Jira wiki markup. -readJira :: PandocMonad m +readJira :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc -readJira _opts s = case parse s of - Right d -> return $ jiraToPandoc d - Left e -> throwError . PandocParseError $ - "Jira parse error" `append` pack (show e) +readJira _opts inp = do + let sources = toSources inp + case parse (sourcesToText sources) of + Right d -> return $ jiraToPandoc d + Left e -> throwError . PandocParseError $ + "Jira parse error" `append` pack (show e) jiraToPandoc :: Jira.Doc -> Pandoc jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 9ad168293..f90d562ae 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty) -- import Debug.Trace (traceShowId) -- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: PandocMonad m +readLaTeX :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assumes @'\n'@ line endings) + -> a -- ^ Input to parse -> m Pandoc readLaTeX opts ltx = do + let sources = toSources ltx parsed <- runParserT parseLaTeX def{ sOptions = opts } "source" - (tokenize "source" (crFilter ltx)) + (tokenizeSources sources) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError ltx e + Left e -> throwError $ PandocParsecError sources e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do @@ -132,11 +133,11 @@ resolveRefs _ x = x rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT Text s m Text + => ParserT Sources s m Text rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" inp + let toks = tokenizeSources inp snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks <|> rawLaTeXParser toks True (do choice (map controlSeq @@ -163,11 +164,11 @@ beginOrEndCommand = try $ do (txt <> untokenize rawargs) rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT Text s m Text + => ParserT Sources s m Text rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" inp + let toks = tokenizeSources inp raw <- snd <$> ( rawLaTeXParser toks True (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced)) @@ -178,11 +179,11 @@ rawLaTeXInline = do finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439 return $ raw <> T.pack finalbraces -inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines +inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" inp + let toks = tokenizeSources inp fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines @@ -641,7 +642,7 @@ opt = do parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (untokenize toks) e + Left e -> throwError $ PandocParsecError (toSources toks) e -- block elements: diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs index 655823dab..af97125c6 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Citation.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs @@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do runParserT (mconcat <$> many inline) st "bracketed option" toks case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (untokenize toks) e + Left e -> throwError $ PandocParsecError (toSources toks) e diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index db58b333d..35ce3509d 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , rawLaTeXParser , applyMacros , tokenize + , tokenizeSources , untokenize , untoken , totoks @@ -248,7 +249,7 @@ withVerbatimMode parser = do rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) => [Tok] -> Bool -> LP m a -> LP m a - -> ParserT Text s m (a, Text) + -> ParserT Sources s m (a, Text) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } @@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do Left _ -> mzero Right ((val, raw), st) -> do updateState (updateMacros (sMacros st <>)) - _ <- takeP (T.length (untokenize toks')) + void $ count (T.length (untokenize toks')) anyChar let result = untokenize raw -- ensure we end with space if input did, see #4442 let result' = @@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do return (val, result') applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => Text -> ParserT Text s m Text + => Text -> ParserT Sources s m Text applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> do let retokenize = untokenize <$> many (satisfyTok (const True)) pstate <- getState @@ -301,6 +302,11 @@ QuickCheck property: > let t = T.pack s in untokenize (tokenize "random" t) == t -} +tokenizeSources :: Sources -> [Tok] +tokenizeSources = concatMap tokenizeSource . unSources + where + tokenizeSource (pos, t) = totoks pos t + tokenize :: SourceName -> Text -> [Tok] tokenize sourcename = totoks (initialPos sourcename) diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs index f8c214318..c20b72bc5 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE FlexibleInstances #-} {- | Module : Text.Pandoc.Readers.LaTeX.Types Copyright : Copyright (C) 2017-2021 John MacFarlane @@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) ) where import Data.Text (Text) -import Text.Parsec.Pos (SourcePos) +import Text.Parsec.Pos (SourcePos, sourceName) +import Text.Pandoc.Sources +import Data.List (groupBy) data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | Esc1 | Esc2 | Arg Int @@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | data Tok = Tok SourcePos TokType Text deriving (Eq, Ord, Show) +instance ToSources [Tok] where + toSources = Sources + . map (\ts -> case ts of + Tok p _ _ : _ -> (p, mconcat $ map tokToText ts) + _ -> error "toSources [Tok] encountered empty group") + . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2) + +tokToText :: Tok -> Text +tokToText (Tok _ _ t) = t + data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed deriving (Eq, Ord, Show) diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 21b8feaab..1141af66f 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void) import Control.Monad.Trans (lift) import Control.Monad.Except (throwError) import Data.Maybe (catMaybes, isJust) -import Data.List (intersperse, intercalate) +import Data.List (intersperse) import qualified Data.Text as T import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report) @@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Walk (query) -import Text.Pandoc.Shared (crFilter, mapLeft) +import Text.Pandoc.Shared (mapLeft) import Text.Pandoc.Readers.Roff -- TODO explicit imports -import Text.Parsec hiding (tokenPrim) import qualified Text.Parsec as Parsec import Text.Parsec.Pos (updatePosString) import qualified Data.Foldable as Foldable @@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m -- | Read man (troff) from an input string and return a Pandoc document. -readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc -readMan opts txt = do - tokenz <- lexRoff (initialPos "input") (crFilter txt) +readMan :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc +readMan opts s = do + let Sources inps = toSources s + tokenz <- mconcat <$> mapM (uncurry lexRoff) inps let state = def {readerOptions = opts} :: ManState + let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e + fixError e = e eitherdoc <- readWithMTokens parseMan state (Foldable.toList . unRoffTokens $ tokenz) - either throwError return eitherdoc + either (throwError . fixError) return eitherdoc + readWithMTokens :: PandocMonad m => ParserT [RoffToken] ManState m a -- ^ parser @@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m -> [RoffToken] -- ^ input -> m (Either PandocError a) readWithMTokens parser state input = - let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input + let leftF = PandocParsecError mempty in mapLeft leftF `liftM` runParserT parser state "source" input + parseMan :: PandocMonad m => ManParser m Pandoc parseMan = do bs <- many parseBlock <* eof @@ -89,7 +96,7 @@ parseBlock = choice [ parseList parseTable :: PandocMonad m => ManParser m Blocks parseTable = do - modifyState $ \st -> st { tableCellsPlain = True } + updateState $ \st -> st { tableCellsPlain = True } let isTbl Tbl{} = True isTbl _ = False Tbl _opts rows pos <- msatisfy isTbl @@ -135,7 +142,7 @@ parseTable = do case res' of Left _ -> Prelude.fail "Could not parse table cell" Right x -> do - modifyState $ \s -> s{ tableCellsPlain = False } + updateState $ \s -> s{ tableCellsPlain = False } return x Right x -> return x @@ -222,7 +229,7 @@ parseTitle = do setMeta "section" (linePartsToInlines y) [x] -> setMeta "title" (linePartsToInlines x) [] -> id - modifyState $ \st -> st{ metadata = adjustMeta $ metadata st } + updateState $ \st -> st{ metadata = adjustMeta $ metadata st } return mempty linePartsToInlines :: [LinePart] -> Inlines diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ba8ed147e..69dd51bc4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock) +-- import Debug.Trace (traceShowId) -type MarkdownParser m = ParserT Text ParserState m +type MarkdownParser m = ParserT Sources ParserState m type F = Future ParserState -- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: PandocMonad m +readMarkdown :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -- ^ Input -> m Pandoc readMarkdown opts s = do parsed <- readWithM parseMarkdown def{ stateOptions = opts } - (crFilter s <> "\n\n") + (ensureFinalNewlines 3 (toSources s)) case parsed of Right result -> return result Left e -> throwError e @@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr setPosition oldPos return $ runF meta defaultParserState - parsed <- readWithM parser def{ stateOptions = opts } "" + parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text) case parsed of Right result -> return result Left e -> throwError e @@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr setPosition oldPos return $ runF refs defaultParserState - parsed <- readWithM parser def{ stateOptions = opts } "" + parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text) case parsed of Right result -> return result Left e -> throwError e @@ -146,14 +147,14 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: PandocMonad m => ParserT Text st m () +spnl :: PandocMonad m => ParserT Sources st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -spnl' :: PandocMonad m => ParserT Text st m Text +spnl' :: PandocMonad m => ParserT Sources st m Text spnl' = try $ do xs <- many spaceChar ys <- option "" $ try $ (:) <$> newline @@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _) -- hrule block -- -hrule :: PandocMonad m => ParserT Text st m (F Blocks) +hrule :: PandocMonad m => ParserT Sources st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline blockDelimiter :: PandocMonad m => (Char -> Bool) -> Maybe Int - -> ParserT Text ParserState m Int + -> ParserT Sources ParserState m Int blockDelimiter f len = try $ do skipNonindentSpaces c <- lookAhead (satisfy f) @@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ T.intercalate "\n" lns' -birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text +birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -1025,7 +1026,7 @@ para = try $ do option (B.plain <$> result) $ try $ do newline - (blanklines >> return mempty) + (mempty <$ blanklines) <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote) <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced) <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header) @@ -1170,7 +1171,7 @@ lineBlock = do -- and the length including trailing space. dashedLine :: PandocMonad m => Char - -> ParserT Text st m (Int, Int) + -> ParserT Sources st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m -> MarkdownParser m [Text] rawTableLine indices = do notFollowedBy' (blanklines' <|> tableFooter) - line <- take1WhileP (/='\n') <* newline + line <- anyLine return $ map trim $ tail $ splitTextByIndices (init indices) line @@ -1390,7 +1391,7 @@ pipeTableCell = return $ B.plain <$> result) <|> return mempty -pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter, len) -- Succeed only if current line contains a pipe. -scanForPipe :: PandocMonad m => ParserT Text st m () +scanForPipe :: PandocMonad m => ParserT Sources st m () scanForPipe = do - inp <- getInput - case T.break (\c -> c == '\n' || c == '|') inp of + Sources inps <- getInput + let ln = case inps of + [] -> "" + ((_,t):(_,t'):_) | T.null t -> t' + ((_,t):_) -> t + case T.break (\c -> c == '\n' || c == '|') ln of (_, T.uncons -> Just ('|', _)) -> return () _ -> mzero @@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: PandocMonad m => ParserT Text st m Char +nonEndline :: PandocMonad m => ParserT Sources st m Char nonEndline = satisfy (/='\n') str :: PandocMonad m => MarkdownParser m (F Inlines) str = do result <- mconcat <$> many1 - ( take1WhileP isAlphaNum + ( T.pack <$> (many1 alphaNum) <|> "." <$ try (char '.' <* notFollowedBy (char '.')) ) updateLastStrPos (do guardEnabled Ext_smart @@ -1962,7 +1967,7 @@ rawLaTeXInline' = do s <- rawLaTeXInline return $ return $ B.rawInline "tex" s -- "tex" because it might be context -rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text +rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> textStr completion) return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion -inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text +inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text inBrackets parser = do char '[' contents <- manyChar parser diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 9f4d5e170..825e4a2eb 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -36,17 +36,18 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) -import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines, +import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim, splitTextBy, tshow) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) -- | Read mediawiki from an input string and return a Pandoc document. -readMediaWiki :: PandocMonad m - => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) +readMediaWiki :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a -> m Pandoc readMediaWiki opts s = do + let sources = toSources s parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 @@ -55,7 +56,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (crFilter s <> "\n") + sources case parsed of Right result -> return result Left e -> throwError e @@ -69,7 +70,7 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwInTT :: Bool } -type MWParser m = ParserT Text MWState m +type MWParser m = ParserT Sources MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs index cb141cba5..bbcfe62ea 100644 --- a/src/Text/Pandoc/Readers/Metadata.hs +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL import qualified Text.Pandoc.UTF8 as UTF8 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> BL.ByteString - -> ParserT Text st m (Future st Meta) + -> ParserT Sources st m (Future st Meta) yamlBsToMeta pMetaValue bstr = do case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of Right (YAML.Doc (YAML.Mapping _ _ o):_) @@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing -- Returns filtered list of references. yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> (Text -> Bool) -- ^ Filter for id -> BL.ByteString - -> ParserT Text st m (Future st [MetaValue]) + -> ParserT Sources st m (Future st [MetaValue]) yamlBsToRefs pMetaValue idpred bstr = case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of Right (YAML.Doc o@YAML.Mapping{}:_) @@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t nodeToKey _ = Nothing normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> Text - -> ParserT Text st m (Future st MetaValue) + -> ParserT Sources st m (Future st MetaValue) normalizeMetaValue pMetaValue x = -- Note: a standard quoted or unquoted YAML value will -- not end in a newline, but a "block" set off with @@ -133,9 +133,9 @@ checkBoolean t | otherwise = Nothing yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> YAML.Node YE.Pos - -> ParserT Text st m (Future st MetaValue) + -> ParserT Sources st m (Future st MetaValue) yamlToMetaValue pMetaValue (YAML.Scalar _ x) = case x of YAML.SStr t -> normalizeMetaValue pMetaValue t @@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) = yamlToMetaValue _ _ = return $ return $ MetaString "" yamlMap :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) - -> ParserT Text st m (Future st (M.Map Text MetaValue)) + -> ParserT Sources st m (Future st (M.Map Text MetaValue)) yamlMap pMetaValue o = do kvs <- forM (M.toList o) $ \(key, v) -> do k <- maybe (throwError $ PandocParseError @@ -177,8 +177,8 @@ yamlMap pMetaValue o = do -- | Parse a YAML metadata block using the supplied 'MetaValue' parser. yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m) - => ParserT Text st m (Future st MetaValue) - -> ParserT Text st m (Future st Meta) + => ParserT Sources st m (Future st MetaValue) + -> ParserT Sources st m (Future st Meta) yamlMetaBlock parser = try $ do string "---" blankline @@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do optional blanklines yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml -stopLine :: Monad m => ParserT Text st m () +stopLine :: Monad m => ParserT Sources st m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 698bfd3d7..a0d4534f1 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing -import Text.Pandoc.Shared (crFilter, trimr, tshow) +import Text.Pandoc.Shared (trimr, tshow) -- | Read Muse from an input string and return a Pandoc document. -readMuse :: PandocMonad m +readMuse :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readMuse opts s = do - let input = crFilter s - res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input + let sources = toSources s + res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } + (initialSourceName sources) sources case res of - Left e -> throwError $ PandocParsecError input e + Left e -> throwError $ PandocParsecError sources e Right d -> return d type F = Future MuseState @@ -82,7 +83,7 @@ instance Default MuseEnv where , museInPara = False } -type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m) +type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m) instance HasReaderOptions MuseState where extractReaderOptions = museOptions @@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1) -- * Parsers -- | Parse end-of-line, which can be either a newline or end-of-file. -eol :: Stream s m Char => ParserT s st m () +eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () eol = void newline <|> eof getIndent :: PandocMonad m diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 9c8bc0374..58f235e81 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -21,6 +21,7 @@ import Control.Monad.Except (throwError) import Data.Text (Text) import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Error +import Text.Pandoc.Sources (ToSources(..), sourcesToText) -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, @@ -32,14 +33,15 @@ import Text.Pandoc.Error -- -- > Pandoc nullMeta [Plain [Str "hi"]] -- -readNative :: PandocMonad m +readNative :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readNative _ s = - case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of - Right doc -> return doc - Left _ -> throwError $ PandocParseError "couldn't read native" + let t = sourcesToText . toSources $ s + in case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of + Right doc -> return doc + Left _ -> throwError $ PandocParseError "couldn't read native" readBlocks :: Text -> Either PandocError [Block] readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 5f2ddb876..668c9ca11 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -24,7 +24,8 @@ import Text.Pandoc.Options import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) -import Text.Pandoc.Shared (crFilter, blocksToInlines') +import Text.Pandoc.Shared (blocksToInlines') +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.Pandoc.XML.Light import Control.Monad.Except (throwError) @@ -46,10 +47,14 @@ instance Default OPMLState where , opmlOptions = def } -readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readOPML :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readOPML opts inp = do - (bs, st') <- runStateT - (case parseXMLContents (TL.fromStrict (crFilter inp)) of + let sources = toSources inp + (bs, st') <- + runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of Left msg -> throwError $ PandocXMLError "" msg Right ns -> mapM parseBlock ns) def{ opmlOptions = opts } diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index afeb27a87..8823befdd 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing (reportLogMessages) -import Text.Pandoc.Shared (crFilter) - +import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines) import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) -import Data.Text (Text) - -- | Parse org-mode string and return a Pandoc document. -readOrg :: PandocMonad m +readOrg :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readOrg opts s = do parsed <- flip runReaderT def $ readWithM parseOrg (optionsToParserState opts) - (crFilter s <> "\n\n") + (ensureFinalNewlines 2 (toSources s)) case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 519a6ce04..054f2611a 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -29,6 +29,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.TeXMath (DisplayType (..), readTeX, writePandoc) +import Text.Pandoc.Sources (ToSources(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Control.Monad (guard, mplus, mzero, unless, void, when) @@ -802,7 +803,7 @@ inlineLaTeX = try $ do parseAsInlineLaTeX :: PandocMonad m => Text -> TeXExport -> OrgParser m (Maybe Inlines) parseAsInlineLaTeX cs = \case - TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs + TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs) TeXIgnore -> return (Just mempty) TeXVerbatim -> return (Just $ B.str cs) diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 6ed24a602..c7ea02815 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -114,7 +114,7 @@ import Control.Monad (guard) import Control.Monad.Reader (ReaderT) -- | The parser used to read org files. -type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m) +type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index ac4c0b6cb..a3fcf028c 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -38,25 +38,24 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Printf (printf) import Data.Time.Format -- TODO: -- [ ] .. parsed-literal -- | Parse reStructuredText string and return Pandoc document. -readRST :: PandocMonad m +readRST :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ Text to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readRST opts s = do parsed <- readWithM parseRST def{ stateOptions = opts } - (crFilter s <> "\n\n") + (ensureFinalNewlines 2 (toSources s)) case parsed of Right result -> return result Left e -> throwError e -type RSTParser m = ParserT Text ParserState m +type RSTParser m = ParserT Sources ParserState m -- -- Constants and data structure definitions @@ -151,11 +150,19 @@ parseRST = do startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- T.concat <$> - manyTill (referenceKey <|> anchorDef <|> - noteBlock <|> citationBlock <|> - (snd <$> withRaw comment) <|> - headerBlock <|> lineClump) eof + let chunk = referenceKey + <|> anchorDef + <|> noteBlock + <|> citationBlock + <|> (snd <$> withRaw comment) + <|> headerBlock + <|> lineClump + docMinusKeys <- Sources <$> + manyTill (do pos <- getPosition + t <- chunk + return (pos, t)) eof + -- UGLY: we collapse source position information. + -- TODO: fix the parser to use the F monad instead of two passes setInput docMinusKeys setPosition startPos st' <- getState @@ -348,7 +355,7 @@ singleHeader' = try $ do -- hrule block -- -hrule :: Monad m => ParserT Text st m Blocks +hrule :: Monad m => ParserT Sources st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -363,7 +370,7 @@ hrule = try $ do -- read a line indented by a given string indentedLine :: (HasReaderOptions st, Monad m) - => Int -> ParserT Text st m Text + => Int -> ParserT Sources st m Text indentedLine indents = try $ do lookAhead spaceChar gobbleAtMostSpaces indents @@ -372,7 +379,7 @@ indentedLine indents = try $ do -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. indentedBlock :: (HasReaderOptions st, Monad m) - => ParserT Text st m Text + => ParserT Sources st m Text indentedBlock = try $ do indents <- length <$> lookAhead (many1 spaceChar) lns <- many1 $ try $ do b <- option "" blanklines @@ -381,20 +388,20 @@ indentedBlock = try $ do optional blanklines return $ T.unlines lns -quotedBlock :: Monad m => ParserT Text st m Text +quotedBlock :: Monad m => ParserT Sources st m Text quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines return $ T.unlines lns -codeBlockStart :: Monad m => ParserT Text st m Char +codeBlockStart :: Monad m => ParserT Sources st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: Monad m => ParserT Text ParserState m Blocks +codeBlock :: Monad m => ParserT Sources ParserState m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: Monad m => ParserT Text ParserState m Blocks +codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks codeBlockBody = do lang <- stateRstHighlight <$> getState try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$> @@ -410,14 +417,14 @@ lhsCodeBlock = try $ do return $ B.codeBlockWith ("", ["haskell","literate"], []) $ T.intercalate "\n" lns -latexCodeBlock :: Monad m => ParserT Text st m [Text] +latexCodeBlock :: Monad m => ParserT Sources st m [Text] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Monad m => ParserT Text st m [Text] +birdCodeBlock :: Monad m => ParserT Sources st m [Text] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it @@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine then map (T.drop 1) lns else lns -birdTrackLine :: Monad m => ParserT Text st m Text +birdTrackLine :: Monad m => ParserT Sources st m Text birdTrackLine = char '>' >> anyLine -- @@ -456,7 +463,6 @@ includeDirective top fields body = do let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead oldPos <- getPosition - oldInput <- getInput containers <- stateContainers <$> getState when (f `elem` containers) $ throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos @@ -494,15 +500,11 @@ includeDirective top fields body = do Nothing -> case lookup "literal" fields of Just _ -> return $ B.rawBlock "rst" contents' Nothing -> do - setPosition $ newPos (T.unpack f) 1 1 - setInput $ contents' <> "\n" - bs <- optional blanklines >> - (mconcat <$> many block) - setInput oldInput - setPosition oldPos + addToSources (initialPos (T.unpack f)) + (contents' <> "\n") updateState $ \s -> s{ stateContainers = tail $ stateContainers s } - return bs + return mempty -- @@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Monad m => ParserT Text st m Int +bulletListStart :: Monad m => ParserT Sources st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName :: Monad m => ParserT Text st m Text +simpleReferenceName :: Monad m => ParserT Sources st m Text simpleReferenceName = do x <- alphaNum xs <- many $ alphaNum @@ -1122,7 +1124,7 @@ referenceKey = do -- return enough blanks to replace key return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -targetURI :: Monad m => ParserT Text st m Text +targetURI :: Monad m => ParserT Sources st m Text targetURI = do skipSpaces optional $ try $ newline >> notFollowedBy blankline @@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m () anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI - pos <- getPosition - let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos)) + -- we need to ensure that the keys are ordered by occurrence in + -- the document. + numKeys <- M.size . stateKeys <$> getState + let key = toKey $ "_" <> T.pack (show numKeys) updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } @@ -1250,13 +1254,13 @@ headerBlock = do -- Grid tables TODO: -- - column spans -dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int) +dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator @@ -1382,7 +1386,7 @@ hyphens = do -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Monad m => ParserT Text st m Inlines +escapedChar :: Monad m => ParserT Sources st m Inlines escapedChar = do c <- escaped anyChar return $ if c == ' ' || c == '\n' || c == '\r' -- '\ ' is null in RST diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 509ce1377..47f16ef4b 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared (safeRead) -import Text.Parsec hiding (tokenPrim) import Text.Pandoc.RoffChar (characterCodes, combiningAccents) import qualified Data.Sequence as Seq import qualified Data.Foldable as Foldable @@ -122,16 +121,16 @@ instance Default RoffState where , afterConditional = False } -type RoffLexer m = ParserT T.Text RoffState m +type RoffLexer m = ParserT Sources RoffState m -- -- Lexer: T.Text -> RoffToken -- -eofline :: Stream s m Char => ParsecT s u m () +eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m () eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}") -spacetab :: Stream s m Char => ParsecT s u m Char +spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char spacetab = char ' ' <|> char '\t' characterCodeMap :: M.Map T.Text Char @@ -303,8 +302,7 @@ expandString = try $ do char '*' cs <- escapeArg <|> countChar 1 anyChar s <- linePartsToText <$> resolveText cs pos - getInput >>= setInput . (s <>) - return () + addToInput s -- Parses: '..' quoteArg :: PandocMonad m => RoffLexer m T.Text @@ -316,7 +314,7 @@ escFont = do font' <- if T.null font || font == "P" then prevFont <$> getState else return $ foldr processFontLetter defaultFontSpec $ T.unpack font - modifyState $ \st -> st{ prevFont = currentFont st + updateState $ \st -> st{ prevFont = currentFont st , currentFont = font' } return [Font font'] where @@ -372,8 +370,8 @@ lexTable pos = do spaces opts <- try tableOptions <|> [] <$ optional (char ';') case lookup "tab" opts of - Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c } - _ -> modifyState $ \st -> st{ tableTabChar = '\t' } + Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c } + _ -> updateState $ \st -> st{ tableTabChar = '\t' } spaces skipMany lexComment spaces @@ -489,18 +487,18 @@ lexConditional mname = do ifPart <- do optional $ try $ char '\\' >> newline lexGroup - <|> do modifyState $ \s -> s{ afterConditional = True } + <|> do updateState $ \s -> s{ afterConditional = True } t <- manToken - modifyState $ \s -> s{ afterConditional = False } + updateState $ \s -> s{ afterConditional = False } return t case mbtest of Nothing -> do - putState st -- reset state, so we don't record macros in skipped section + setState st -- reset state, so we don't record macros in skipped section report $ SkippedContent (T.cons '.' mname) pos return mempty Just True -> return ifPart Just False -> do - putState st + setState st return mempty expression :: PandocMonad m => RoffLexer m (Maybe Bool) @@ -515,7 +513,7 @@ expression = do _ -> Nothing where returnValue v = do - modifyState $ \st -> st{ lastExpression = v } + updateState $ \st -> st{ lastExpression = v } return v lexGroup :: PandocMonad m => RoffLexer m RoffTokens @@ -536,7 +534,7 @@ lexIncludeFile args = do result <- readFileFromDirs dirs $ T.unpack fp case result of Nothing -> report $ CouldNotLoadIncludeFile fp pos - Just s -> getInput >>= setInput . (s <>) + Just s -> addToInput s return mempty [] -> return mempty @@ -564,13 +562,13 @@ lexStringDef args = do -- string definition (x:ys) -> do let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys) let stringName = linePartsToText x - modifyState $ \st -> + updateState $ \st -> st{ customMacros = M.insert stringName ts (customMacros st) } return mempty lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens lexMacroDef args = do -- macro definition - modifyState $ \st -> st{ roffMode = CopyMode } + updateState $ \st -> st{ roffMode = CopyMode } (macroName, stopMacro) <- case args of (x : y : _) -> return (linePartsToText x, linePartsToText y) @@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition _ <- lexArgs return () ts <- mconcat <$> manyTill manToken stop - modifyState $ \st -> + updateState $ \st -> st{ customMacros = M.insert macroName ts (customMacros st) , roffMode = NormalMode } return mempty diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index c4d7bcc93..276d28aaa 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -28,22 +28,22 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Text.Pandoc.Shared (crFilter, tshow) +import Text.Pandoc.Shared (tshow) import Text.Pandoc.XML (fromEntities) -- | Read twiki from an input string and return a Pandoc document. -readTWiki :: PandocMonad m +readTWiki :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readTWiki opts s = do - res <- readWithM parseTWiki def{ stateOptions = opts } - (crFilter s <> "\n\n") + let sources = ensureFinalNewlines 2 (toSources s) + res <- readWithM parseTWiki def{ stateOptions = opts } sources case res of Left e -> throwError e Right d -> return d -type TWParser = ParserT Text ParserState +type TWParser = ParserT Sources ParserState -- -- utility functions diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 8d7900de4..981878206 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -53,30 +53,34 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) -import Text.Pandoc.Shared (crFilter, trim, tshow) +import Text.Pandoc.Shared (trim, tshow) -- | Parse a Textile text and return a Pandoc document. -readTextile :: PandocMonad m +readTextile :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readTextile opts s = do - parsed <- readWithM parseTextile def{ stateOptions = opts } - (crFilter s <> "\n\n") + let sources = ensureFinalNewlines 2 (toSources s) + parsed <- readWithM parseTextile def{ stateOptions = opts } sources case parsed of Right result -> return result Left e -> throwError e +type TextileParser = ParserT Sources ParserState -- | Generate a Pandoc ADT from a textile document -parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc +parseTextile :: PandocMonad m => TextileParser m Pandoc parseTextile = do many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys/notes were... - let firstPassParser = noteBlock <|> lineClump - manyTill firstPassParser eof >>= setInput . T.concat + let firstPassParser = do + pos <- getPosition + t <- noteBlock <|> lineClump + return (pos, t) + manyTill firstPassParser eof >>= setInput . Sources setPosition startPos st' <- getState let reversedNotes = stateNotes st' @@ -84,10 +88,10 @@ parseTextile = do -- now parse it for real... Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME -noteMarker :: PandocMonad m => ParserT Text ParserState m Text +noteMarker :: PandocMonad m => TextileParser m Text noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.') -noteBlock :: PandocMonad m => ParserT Text ParserState m Text +noteBlock :: PandocMonad m => TextileParser m Text noteBlock = try $ do startPos <- getPosition ref <- noteMarker @@ -102,11 +106,11 @@ noteBlock = try $ do return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -- | Parse document blocks -parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks +parseBlocks :: PandocMonad m => TextileParser m Blocks parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks] +blockParsers :: PandocMonad m => [TextileParser m Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -121,22 +125,22 @@ blockParsers = [ codeBlock ] -- | Any block in the order of definition of blockParsers -block :: PandocMonad m => ParserT Text ParserState m Blocks +block :: PandocMonad m => TextileParser m Blocks block = do res <- choice blockParsers <?> "block" trace (T.take 60 $ tshow $ B.toList res) return res -commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks +commentBlock :: PandocMonad m => TextileParser m Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines return mempty -codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks +codeBlock :: PandocMonad m => TextileParser m Blocks codeBlock = codeBlockTextile <|> codeBlockHtml -codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks +codeBlockTextile :: PandocMonad m => TextileParser m Blocks codeBlockTextile = try $ do string "bc." <|> string "pre." extended <- option False (True <$ char '.') @@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text trimTrailingNewlines = T.dropWhileEnd (=='\n') -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks +codeBlockHtml :: PandocMonad m => TextileParser m Blocks codeBlockHtml = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre"))) @@ -174,7 +178,7 @@ codeBlockHtml = try $ do return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: PandocMonad m => ParserT Text ParserState m Blocks +header :: PandocMonad m => TextileParser m Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -186,14 +190,14 @@ header = try $ do return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks +blockQuote :: PandocMonad m => TextileParser m Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace B.blockQuote <$> para -- Horizontal rule -hrule :: PandocMonad m => ParserT Text st m Blocks +hrule :: PandocMonad m => TextileParser m Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -208,39 +212,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: PandocMonad m => ParserT Text ParserState m Blocks +anyList :: PandocMonad m => TextileParser m Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks +genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|> @@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do return $ contents <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: PandocMonad m => ParserT Text ParserState m Blocks +definitionList :: PandocMonad m => TextileParser m Blocks definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. -listStart :: PandocMonad m => ParserT Text ParserState m () +listStart :: PandocMonad m => TextileParser m () listStart = genericListStart '*' <|> () <$ genericListStart '#' <|> () <$ definitionListStart -genericListStart :: PandocMonad m => Char -> ParserT Text st m () +genericListStart :: PandocMonad m => Char -> TextileParser m () genericListStart c = () <$ try (many1 (char c) >> whitespace) -basicDLStart :: PandocMonad m => ParserT Text ParserState m () +basicDLStart :: PandocMonad m => TextileParser m () basicDLStart = do char '-' whitespace notFollowedBy newline -definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines +definitionListStart :: PandocMonad m => TextileParser m Inlines definitionListStart = try $ do basicDLStart trimInlines . mconcat <$> @@ -281,15 +285,15 @@ definitionListStart = try $ do -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks]) definitionListItem = try $ do term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef) return (term, def') - where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] + where inlineDef :: PandocMonad m => TextileParser m [Blocks] inlineDef = liftM (\d -> [B.plain d]) $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline - multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] + multilineDef :: PandocMonad m => TextileParser m [Blocks] multilineDef = try $ do optional whitespace >> newline s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline)) @@ -300,7 +304,7 @@ definitionListItem = try $ do -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks +rawHtmlBlock :: PandocMonad m => TextileParser m Blocks rawHtmlBlock = try $ do skipMany spaceChar (_,b) <- htmlTag isBlockTag @@ -308,14 +312,14 @@ rawHtmlBlock = try $ do return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks +rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: PandocMonad m => ParserT Text ParserState m Blocks +para :: PandocMonad m => TextileParser m Blocks para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -326,7 +330,7 @@ toAlignment '>' = AlignRight toAlignment '=' = AlignCenter toAlignment _ = AlignDefault -cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment) +cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment) cellAttributes = try $ do isHeader <- option False (True <$ char '_') -- we just ignore colspan and rowspan markers: @@ -339,7 +343,7 @@ cellAttributes = try $ do return (isHeader, alignment) -- | A table cell spans until a pipe | -tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks) +tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks) tableCell = try $ do char '|' (isHeader, alignment) <- option (False, AlignDefault) cellAttributes @@ -350,7 +354,7 @@ tableCell = try $ do return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells -tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)] +tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)] tableRow = try $ do -- skip optional row attributes optional $ try $ do @@ -360,7 +364,7 @@ tableRow = try $ do many1 tableCell <* char '|' <* blankline -- | A table with an optional header. -table :: PandocMonad m => ParserT Text ParserState m Blocks +table :: PandocMonad m => TextileParser m Blocks table = try $ do -- ignore table attributes caption <- option mempty $ try $ do @@ -388,7 +392,7 @@ table = try $ do (TableFoot nullAttr []) -- | Ignore markers for cols, thead, tfoot. -ignorableRow :: PandocMonad m => ParserT Text ParserState m () +ignorableRow :: PandocMonad m => TextileParser m () ignorableRow = try $ do char '|' oneOf ":^-~" @@ -397,7 +401,7 @@ ignorableRow = try $ do _ <- anyLine return () -explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m () +explicitBlockStart :: PandocMonad m => Text -> TextileParser m () explicitBlockStart name = try $ do string (T.unpack name) attributes @@ -409,8 +413,8 @@ explicitBlockStart name = try $ do -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: PandocMonad m => Text -- ^ block tag name - -> ParserT Text ParserState m Blocks -- ^ implicit block - -> ParserT Text ParserState m Blocks + -> TextileParser m Blocks -- ^ implicit block + -> TextileParser m Blocks maybeExplicitBlock name blk = try $ do optional $ explicitBlockStart name blk @@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: PandocMonad m => ParserT Text ParserState m Inlines +inline :: PandocMonad m => TextileParser m Inlines inline = choice inlineParsers <?> "inline" -- | Inline parsers tried in order -inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines] +inlineParsers :: PandocMonad m => [TextileParser m Inlines] inlineParsers = [ str , whitespace , endline @@ -447,7 +451,7 @@ inlineParsers = [ str ] -- | Inline markups -inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines +inlineMarkup :: PandocMonad m => TextileParser m Inlines inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (string "**") B.strong , simpleInline (string "__") B.emph @@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) ] -- | Trademark, registered, copyright -mark :: PandocMonad m => ParserT Text st m Inlines +mark :: PandocMonad m => TextileParser m Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: PandocMonad m => ParserT Text st m Inlines +reg :: PandocMonad m => TextileParser m Inlines reg = do oneOf "Rr" char ')' return $ B.str "\174" -tm :: PandocMonad m => ParserT Text st m Inlines +tm :: PandocMonad m => TextileParser m Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' return $ B.str "\8482" -copy :: PandocMonad m => ParserT Text st m Inlines +copy :: PandocMonad m => TextileParser m Inlines copy = do oneOf "Cc" char ')' return $ B.str "\169" -note :: PandocMonad m => ParserT Text ParserState m Inlines +note :: PandocMonad m => TextileParser m Inlines note = try $ do ref <- char '[' *> many1 digit <* char ']' notes <- stateNotes <$> getState @@ -507,13 +511,13 @@ wordBoundaries :: [Char] wordBoundaries = markupChars <> stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text +hyphenedWords :: PandocMonad m => TextileParser m Text hyphenedWords = do x <- wordChunk xs <- many (try $ char '-' >> wordChunk) return $ T.intercalate "-" (x:xs) -wordChunk :: PandocMonad m => ParserT Text ParserState m Text +wordChunk :: PandocMonad m => TextileParser m Text wordChunk = try $ do hd <- noneOf wordBoundaries tl <- many ( noneOf wordBoundaries <|> @@ -522,7 +526,7 @@ wordChunk = try $ do return $ T.pack $ hd:tl -- | Any string -str :: PandocMonad m => ParserT Text ParserState m Inlines +str :: PandocMonad m => TextileParser m Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediately @@ -535,11 +539,11 @@ str = do return $ B.str fullStr -- | Some number of space chars -whitespace :: PandocMonad m => ParserT Text st m Inlines +whitespace :: PandocMonad m => TextileParser m Inlines whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: PandocMonad m => ParserT Text ParserState m Inlines +endline :: PandocMonad m => TextileParser m Inlines endline = try $ do newline notFollowedBy blankline @@ -547,18 +551,18 @@ endline = try $ do notFollowedBy rawHtmlBlock return B.linebreak -rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines +rawHtmlInline :: PandocMonad m => TextileParser m Inlines rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines +rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: PandocMonad m => ParserT Text ParserState m Inlines +link :: PandocMonad m => TextileParser m Inlines link = try $ do bracketed <- (True <$ char '[') <|> return False char '"' *> notFollowedBy (oneOf " \t\n\r") @@ -578,7 +582,7 @@ link = try $ do else B.spanWith attr $ B.link url "" name' -- | image embedding -image :: PandocMonad m => ParserT Text ParserState m Inlines +image :: PandocMonad m => TextileParser m Inlines image = try $ do char '!' >> notFollowedBy space (ident, cls, kvs) <- attributes @@ -590,51 +594,51 @@ image = try $ do char '!' return $ B.imageWith attr src alt (B.str alt) -escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines +escapedInline :: PandocMonad m => TextileParser m Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines +escapedEqs :: PandocMonad m => TextileParser m Inlines escapedEqs = B.str . T.pack <$> try (string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines +escapedTag :: PandocMonad m => TextileParser m Inlines escapedTag = B.str . T.pack <$> try (string "<notextile>" *> manyTill anyChar' (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: PandocMonad m => ParserT Text ParserState m Inlines +symbol :: PandocMonad m => TextileParser m Inlines symbol = B.str . T.singleton <$> (notFollowedBy newline *> notFollowedBy rawHtmlBlock *> oneOf wordBoundaries) -- | Inline code -code :: PandocMonad m => ParserT Text ParserState m Inlines +code :: PandocMonad m => TextileParser m Inlines code = code1 <|> code2 -- any character except a newline before a blank line -anyChar' :: PandocMonad m => ParserT Text ParserState m Char +anyChar' :: PandocMonad m => TextileParser m Char anyChar' = satisfy (/='\n') <|> try (char '\n' <* notFollowedBy blankline) -code1 :: PandocMonad m => ParserT Text ParserState m Inlines +code1 :: PandocMonad m => TextileParser m Inlines code1 = B.code . T.pack <$> surrounded (char '@') anyChar' -code2 :: PandocMonad m => ParserT Text ParserState m Inlines +code2 :: PandocMonad m => TextileParser m Inlines code2 = do htmlTag (tagOpen (=="tt") null) B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: PandocMonad m => ParserT Text ParserState m Attr +attributes :: PandocMonad m => TextileParser m Attr attributes = foldl' (flip ($)) ("",[],[]) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) -specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr) specialAttribute = do alignStr <- ("center" <$ char '=') <|> ("justify" <$ try (string "<>")) <|> @@ -643,11 +647,11 @@ specialAttribute = do notFollowedBy spaceChar return $ addStyle $ T.pack $ "text-align:" ++ alignStr -attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +attribute :: PandocMonad m => TextileParser m (Attr -> Attr) attribute = try $ (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar -classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')') @@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id) classes' -> return $ \(_,_,keyvals) -> ("",classes',keyvals) -styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr) styleAttr = do style <- try $ enclosed (char '{') (char '}') anyChar' return $ addStyle $ T.pack style @@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) = where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals] -langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +langAttr :: PandocMonad m => TextileParser m (Attr -> Attr) langAttr = do lang <- try $ enclosed (char '[') (char ']') alphaNum return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals) -- | Parses material surrounded by a parser. surrounded :: (PandocMonad m, Show t) - => ParserT Text st m t -- ^ surrounding parser - -> ParserT Text st m a -- ^ content parser (to be used repeatedly) - -> ParserT Text st m [a] + => ParserT Sources st m t -- ^ surrounding parser + -> ParserT Sources st m a -- ^ content parser (to be used repeatedly) + -> ParserT Sources st m [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) simpleInline :: PandocMonad m - => ParserT Text ParserState m t -- ^ surrounding parser + => TextileParser m t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor - -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly) + -> TextileParser m Inlines -- ^ content parser (to be used repeatedly) simpleInline border construct = try $ do notAfterString border *> notFollowedBy (oneOf " \t\n\r") @@ -700,7 +704,7 @@ simpleInline border construct = try $ do then body else B.spanWith attr body -groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines +groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace @@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do char ']' return $ sp1 <> result <> sp2 -eof' :: Monad m => ParserT Text s m Char +eof' :: Monad m => ParserT Sources s m Char eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs index fb4b662c5..5c414fdec 100644 --- a/src/Text/Pandoc/Readers/TikiWiki.hs +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -30,23 +30,23 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging (Verbosity (..)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) -import Text.Pandoc.Shared (crFilter, safeRead) +import Text.Pandoc.Shared (safeRead) import Text.Pandoc.XML (fromEntities) import Text.Printf (printf) -- | Read TikiWiki from an input string and return a Pandoc document. -readTikiWiki :: PandocMonad m +readTikiWiki :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readTikiWiki opts s = do - res <- readWithM parseTikiWiki def{ stateOptions = opts } - (crFilter s <> "\n\n") + let sources = ensureFinalNewlines 2 (toSources s) + res <- readWithM parseTikiWiki def{ stateOptions = opts } sources case res of Left e -> throwError e Right d -> return d -type TikiWikiParser = ParserT Text ParserState +type TikiWikiParser = ParserT Sources ParserState -- -- utility functions diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index d355a4b55..6f92f0063 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (space, spaces, uri) -import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI) +import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI) -type T2T = ParserT Text ParserState (Reader T2TMeta) +type T2T = ParserT Sources ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file @@ -68,15 +68,15 @@ getT2TMeta = do (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: PandocMonad m +readTxt2Tags :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readTxt2Tags opts s = do + let sources = ensureFinalNewlines 2 (toSources s) meta <- getT2TMeta let parsed = flip runReader meta $ - readWithM parseT2T (def {stateOptions = opts}) $ - crFilter s <> "\n\n" + readWithM parseT2T (def {stateOptions = opts}) sources case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 74dac5ea7..460f304c4 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress, many1Till, orderedListMarker, readWithM, registerHeader, spaceChar, stateMeta, stateOptions, uri, manyTillChar, manyChar, textStr, - many1Char, countChar, many1TillChar) -import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast, + many1Char, countChar, many1TillChar, + alphaNum, anyChar, char, newline, noneOf, oneOf, + space, spaces, string) +import Text.Pandoc.Sources (ToSources(..), Sources) +import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast, isURI, tshow) -import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space, - spaces, string) import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1, manyTill, notFollowedBy, option, skipMany1) import Text.Parsec.Prim (getState, many, try, updateState, (<|>)) -readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readVimwiki :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readVimwiki opts s = do - res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s + let sources = toSources s + res <- readWithM parseVimwiki def{ stateOptions = opts } sources case res of Left e -> throwError e Right result -> return result -type VwParser = ParserT Text ParserState +type VwParser = ParserT Sources ParserState -- constants |