diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
34 files changed, 443 insertions, 355 deletions
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs index b82a81350..318afda85 100644 --- a/src/Text/Pandoc/Readers/BibTeX.hs +++ b/src/Text/Pandoc/Readers/BibTeX.hs @@ -23,30 +23,33 @@ where import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, cite, str) -import Data.Text (Text) import Citeproc (Lang(..), parseLang) import Citeproc.Locale (getLocale) import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Class (PandocMonad, lookupEnv) import Text.Pandoc.Citeproc.BibTeX as BibTeX import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue) +import Text.Pandoc.Sources (ToSources(..)) import Control.Monad.Except (throwError) -- | Read BibTeX from an input string and return a Pandoc document. -- The document will have only metadata, with an empty body. -- The metadata will contain a `references` field with the -- bibliography entries, and a `nocite` field with the wildcard `[@*]`. -readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readBibTeX :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc readBibTeX = readBibTeX' BibTeX.Bibtex -- | Read BibLaTeX from an input string and return a Pandoc document. -- The document will have only metadata, with an empty body. -- The metadata will contain a `references` field with the -- bibliography entries, and a `nocite` field with the wildcard `[@*]`. -readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readBibLaTeX :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc readBibLaTeX = readBibTeX' BibTeX.Biblatex -readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc +readBibTeX' :: (PandocMonad m, ToSources a) + => Variant -> ReaderOptions -> a -> m Pandoc readBibTeX' variant _opts t = do mblangEnv <- lookupEnv "LANG" let defaultLang = Lang "en" Nothing (Just "US") [] [] [] @@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do Left _ -> throwError $ PandocCiteprocError e Right l -> return l case BibTeX.readBibtexString variant locale (const True) t of - Left e -> throwError $ PandocParsecError t e + Left e -> throwError $ PandocParsecError (toSources t) e Right refs -> return $ setMeta "references" (map referenceToMetaValue refs) . setMeta "nocite" diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs index 2958d6180..eca8f9425 100644 --- a/src/Text/Pandoc/Readers/CSV.hs +++ b/src/Text/Pandoc/Readers/CSV.hs @@ -13,23 +13,23 @@ Conversion from CSV to a 'Pandoc' table. -} module Text.Pandoc.Readers.CSV ( readCSV ) where -import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.CSV (parseCSV, defaultCSVOptions) import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) -import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Error +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.Pandoc.Options (ReaderOptions) import Control.Monad.Except (throwError) -readCSV :: PandocMonad m +readCSV :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ Text to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc -readCSV _opts s = - case parseCSV defaultCSVOptions (crFilter s) of +readCSV _opts s = do + let txt = sourcesToText $ toSources s + case parseCSV defaultCSVOptions txt of Right (r:rs) -> return $ B.doc $ B.table capt (zip aligns widths) (TableHead nullAttr hdrs) @@ -45,4 +45,4 @@ readCSV _opts s = aligns = replicate numcols AlignDefault widths = replicate numcols ColWidthDefault Right [] -> return $ B.doc mempty - Left e -> throwError $ PandocParsecError s e + Left e -> throwError $ PandocParsecError (toSources [("",txt)]) e diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 244f77940..b099a9b50 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock) import Control.Monad.Except import Data.Functor.Identity (runIdentity) import Data.Typeable -import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine, - runF, defaultParserState, take1WhileP, option) +import Text.Pandoc.Parsing (runParserT, getPosition, + runF, defaultParserState, option, many1, anyChar, + Sources(..), ToSources(..), ParserT, Future, + sourceName) import qualified Data.Text as T -- | Parse a CommonMark formatted string into a 'Pandoc' structure. -readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readCommonMark :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc readCommonMark opts s - | isEnabled Ext_yaml_metadata_block opts - , "---" `T.isPrefixOf` s = do - let metaValueParser = do - inp <- option "" $ take1WhileP (const True) - case runIdentity - (commonmarkWith (specFor opts) "metadata value" inp) of - Left _ -> mzero - Right (Cm bls :: Cm () Blocks) - -> return $ return $ B.toMetaValue bls - res <- runParserT (do meta <- yamlMetaBlock metaValueParser - pos <- getPosition - return (meta, pos)) - defaultParserState "YAML metadata" s - case res of - Left _ -> readCommonMarkBody opts s - Right (meta, pos) -> do - let dropLines 0 = id - dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n') - let metaLines = sourceLine pos - 1 - let body = T.replicate metaLines "\n" <> dropLines metaLines s - Pandoc _ bs <- readCommonMarkBody opts body - return $ Pandoc (runF meta defaultParserState) bs - | otherwise = readCommonMarkBody opts s + | isEnabled Ext_yaml_metadata_block opts = do + let sources = toSources s + let toks = concatMap sourceToToks (unSources sources) + res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts) + pos <- getPosition + return (meta, pos)) + defaultParserState "YAML metadata" (toSources s) + case res of + Left _ -> readCommonMarkBody opts sources toks + Right (meta, pos) -> do + -- strip off metadata section and parse body + let body = dropWhile (\t -> tokPos t < pos) toks + Pandoc _ bs <- readCommonMarkBody opts sources body + return $ Pandoc (runF meta defaultParserState) bs + | otherwise = do + let sources = toSources s + let toks = concatMap sourceToToks (unSources sources) + readCommonMarkBody opts sources toks -readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readCommonMarkBody opts s +sourceToToks :: (SourcePos, Text) -> [Tok] +sourceToToks (pos, s) = tokenize (sourceName pos) s + +metaValueParser :: Monad m + => ReaderOptions -> ParserT Sources st m (Future st MetaValue) +metaValueParser opts = do + inp <- option "" $ T.pack <$> many1 anyChar + let toks = concatMap sourceToToks (unSources (toSources inp)) + case runIdentity (parseCommonmarkWith (specFor opts) toks) of + Left _ -> mzero + Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls + +readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc +readCommonMarkBody opts s toks | isEnabled Ext_sourcepos opts = - case runIdentity (commonmarkWith (specFor opts) "" s) of + case runIdentity (parseCommonmarkWith (specFor opts) toks) of Left err -> throwError $ PandocParsecError s err Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls | otherwise = - case runIdentity (commonmarkWith (specFor opts) "" s) of + case runIdentity (parseCommonmarkWith (specFor opts) toks) of Left err -> throwError $ PandocParsecError s err Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs index 2658dfea2..ad848ada7 100644 --- a/src/Text/Pandoc/Readers/Creole.hs +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed) -import Text.Pandoc.Shared (crFilter) - -- | Read creole from an input string and return a Pandoc document. -readCreole :: PandocMonad m +readCreole :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readCreole opts s = do - res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n" + let sources = ensureFinalNewlines 2 (toSources s) + res <- readWithM parseCreole def{ stateOptions = opts } sources case res of Left e -> throwError e Right d -> return d -type CRLParser = ParserT Text ParserState +type CRLParser = ParserT Sources ParserState -- -- Utility functions diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs index 30bb19483..a0af5c325 100644 --- a/src/Text/Pandoc/Readers/CslJson.hs +++ b/src/Text/Pandoc/Readers/CslJson.hs @@ -24,21 +24,22 @@ import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, cite, str) import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue) import Control.Monad.Except (throwError) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) -- | Read CSL JSON from an input string and return a Pandoc document. -- The document will have only metadata, with an empty body. -- The metadata will contain a `references` field with the -- bibliography entries, and a `nocite` field with the wildcard `[@*]`. -readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readCslJson _opts t = - case cslJsonToReferences (UTF8.fromText t) of +readCslJson :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc +readCslJson _opts x = + case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of Left e -> throwError $ PandocParseError $ T.pack e Right refs -> return $ setMeta "references" (map referenceToMetaValue refs) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index ac3caa2c0..3db459cfd 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -30,7 +30,8 @@ import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Options import Text.Pandoc.Logging (LogMessage(..)) -import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) +import Text.Pandoc.Shared (safeRead, extractSpaces) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.TeXMath (readMathML, writeTeX) import Text.Pandoc.XML.Light @@ -539,11 +540,15 @@ instance Default DBState where , dbContent = [] } -readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readDocBook :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readDocBook _ inp = do + let sources = toSources inp tree <- either (throwError . PandocXMLError "") return $ parseXMLContents - (TL.fromStrict . handleInstructions $ crFilter inp) + (TL.fromStrict . handleInstructions . sourcesToText $ sources) (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs index dedc1f03f..db98ac8de 100644 --- a/src/Text/Pandoc/Readers/DokuWiki.hs +++ b/src/Text/Pandoc/Readers/DokuWiki.hs @@ -29,26 +29,27 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) -import Text.Pandoc.Shared (crFilter, trim, stringify, tshow) +import Text.Pandoc.Shared (trim, stringify, tshow) -- | Read DokuWiki from an input string and return a Pandoc document. -readDokuWiki :: PandocMonad m +readDokuWiki :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readDokuWiki opts s = do - let input = crFilter s - res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input + let sources = toSources s + res <- runParserT parseDokuWiki def {stateOptions = opts } + (initialSourceName sources) sources case res of - Left e -> throwError $ PandocParsecError input e + Left e -> throwError $ PandocParsecError sources e Right d -> return d -type DWParser = ParserT Text ParserState +type DWParser = ParserT Sources ParserState -- * Utility functions -- | Parse end-of-line, which can be either a newline or end-of-file. -eol :: Stream s m Char => ParserT s st m () +eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () eol = void newline <|> eof nested :: PandocMonad m => DWParser m a -> DWParser m a diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index 66e390bd7..84e5278db 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter) import Text.Pandoc.XML.Light import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Sources (ToSources(..), sourcesToText) type FB2 m = StateT FB2State m @@ -63,9 +63,12 @@ instance HasMeta FB2State where setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)} deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)} -readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readFB2 :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readFB2 _ inp = - case parseXMLElement $ TL.fromStrict $ crFilter inp of + case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of Left msg -> throwError $ PandocXMLError "" msg Right el -> do (bs, st) <- runStateT (parseRootElement el) def diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index c3e68afd8..f5c8a2277 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -62,21 +62,21 @@ import Text.Pandoc.Options ( extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Shared ( - addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces, + addMetaField, blocksToInlines', escapeURI, extractSpaces, htmlSpanLikeElements, renderTags', safeRead, tshow) import Text.Pandoc.Walk import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) -- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: PandocMonad m +readHtml :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assumes @'\n'@ line endings) + -> a -- ^ Input to parse -> m Pandoc readHtml opts inp = do let tags = stripPrefixes $ canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } - (crFilter inp) + (sourcesToText $ toSources inp) parseDoc = do blocks <- fixPlains False . mconcat <$> manyTill block eof meta <- stateMeta . parserState <$> getState @@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline pTagText :: PandocMonad m => TagParser m Inlines pTagText = try $ do + pos <- getPosition (TagText str) <- pSatisfy isTagText st <- getState qu <- ask parsed <- lift $ lift $ - flip runReaderT qu $ runParserT (many pTagContents) st "text" str + flip runReaderT qu $ runParserT (many pTagContents) st "text" + (Sources [(pos, str)]) case parsed of Left _ -> throwError $ PandocParseError $ "Could not parse `" <> str <> "'" Right result -> return $ mconcat result -type InlinesParser m = HTMLParser m Text +type InlinesParser m = HTMLParser m Sources pTagContents :: PandocMonad m => InlinesParser m Inlines pTagContents = @@ -970,13 +972,14 @@ isCommentTag = tagComment (const True) -- | Matches a stretch of HTML in balanced tags. htmlInBalanced :: Monad m => (Tag Text -> Bool) - -> ParserT Text st m Text + -> ParserT Sources st m Text htmlInBalanced f = try $ do lookAhead (char '<') - inp <- getInput - let ts = canonicalizeTags $ - parseTagsOptions parseOptions{ optTagWarning = True, - optTagPosition = True } inp + sources <- getInput + let ts = canonicalizeTags + $ parseTagsOptions parseOptions{ optTagWarning = True, + optTagPosition = True } + $ sourcesToText sources case ts of (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do guard $ f t @@ -1018,15 +1021,17 @@ hasTagWarning _ = False -- | Matches a tag meeting a certain condition. htmlTag :: (HasReaderOptions st, Monad m) => (Tag Text -> Bool) - -> ParserT Text st m (Tag Text, Text) + -> ParserT Sources st m (Tag Text, Text) htmlTag f = try $ do lookAhead (char '<') startpos <- getPosition - inp <- getInput + sources <- getInput + let inp = sourcesToText sources let ts = canonicalizeTags $ parseTagsOptions parseOptions{ optTagWarning = False , optTagPosition = True } - (inp <> " ") -- add space to ensure that + (inp <> " ") + -- add space to ensure that -- we get a TagPosition after the tag (next, ln, col) <- case ts of (TagPosition{} : next : TagPosition ln col : _) diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 48454e353..35eaac0a9 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -19,7 +19,7 @@ import Control.Monad.Except (throwError) import Data.List (intersperse) import Data.List.NonEmpty (nonEmpty) import Data.Maybe (fromMaybe) -import Data.Text (Text, unpack) +import Data.Text (unpack) import qualified Data.Text as T import Documentation.Haddock.Parser import Documentation.Haddock.Types as H @@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter, splitTextBy, trim) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) +import Text.Pandoc.Shared (splitTextBy, trim) -- | Parse Haddock markup and return a 'Pandoc' document. -readHaddock :: PandocMonad m +readHaddock :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc -readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of +readHaddock opts s = case readHaddockEither opts + (unpack . sourcesToText . toSources $ s) of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index 70296bb6b..cd1093109 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -39,10 +39,12 @@ import Data.Aeson as Aeson import Control.Monad.Except (throwError) import Text.Pandoc.Readers.Markdown (readMarkdown) import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Sources (ToSources(..), sourcesToText) -readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readIpynb opts t = do - let src = BL.fromStrict (TE.encodeUtf8 t) +readIpynb :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc +readIpynb opts x = do + let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x case eitherDecode src of Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4 Left _ -> diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index c068f3774..9cdbf1611 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) +import Text.Pandoc.Shared (safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.Pandoc.XML.Light import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) type JATS m = StateT JATSState m @@ -52,10 +53,14 @@ instance Default JATSState where , jatsContent = [] } -readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readJATS :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readJATS _ inp = do + let sources = toSources inp tree <- either (throwError . PandocXMLError "") return $ - parseXMLContents (TL.fromStrict $ crFilter inp) + parseXMLContents (TL.fromStrict . sourcesToText $ sources) (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs index 89aecbf56..a3b415f09 100644 --- a/src/Text/Pandoc/Readers/Jira.hs +++ b/src/Text/Pandoc/Readers/Jira.hs @@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell) import Text.Pandoc.Error (PandocError (PandocParseError)) import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Shared (stringify) - +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import qualified Text.Jira.Markup as Jira -- | Read Jira wiki markup. -readJira :: PandocMonad m +readJira :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc -readJira _opts s = case parse s of - Right d -> return $ jiraToPandoc d - Left e -> throwError . PandocParseError $ - "Jira parse error" `append` pack (show e) +readJira _opts inp = do + let sources = toSources inp + case parse (sourcesToText sources) of + Right d -> return $ jiraToPandoc d + Left e -> throwError . PandocParseError $ + "Jira parse error" `append` pack (show e) jiraToPandoc :: Jira.Doc -> Pandoc jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 9ad168293..f90d562ae 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty) -- import Debug.Trace (traceShowId) -- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: PandocMonad m +readLaTeX :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assumes @'\n'@ line endings) + -> a -- ^ Input to parse -> m Pandoc readLaTeX opts ltx = do + let sources = toSources ltx parsed <- runParserT parseLaTeX def{ sOptions = opts } "source" - (tokenize "source" (crFilter ltx)) + (tokenizeSources sources) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError ltx e + Left e -> throwError $ PandocParsecError sources e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do @@ -132,11 +133,11 @@ resolveRefs _ x = x rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT Text s m Text + => ParserT Sources s m Text rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" inp + let toks = tokenizeSources inp snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks <|> rawLaTeXParser toks True (do choice (map controlSeq @@ -163,11 +164,11 @@ beginOrEndCommand = try $ do (txt <> untokenize rawargs) rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT Text s m Text + => ParserT Sources s m Text rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" inp + let toks = tokenizeSources inp raw <- snd <$> ( rawLaTeXParser toks True (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced)) @@ -178,11 +179,11 @@ rawLaTeXInline = do finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439 return $ raw <> T.pack finalbraces -inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines +inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" inp + let toks = tokenizeSources inp fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines @@ -641,7 +642,7 @@ opt = do parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (untokenize toks) e + Left e -> throwError $ PandocParsecError (toSources toks) e -- block elements: diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs index 655823dab..af97125c6 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Citation.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs @@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do runParserT (mconcat <$> many inline) st "bracketed option" toks case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (untokenize toks) e + Left e -> throwError $ PandocParsecError (toSources toks) e diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index db58b333d..35ce3509d 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , rawLaTeXParser , applyMacros , tokenize + , tokenizeSources , untokenize , untoken , totoks @@ -248,7 +249,7 @@ withVerbatimMode parser = do rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) => [Tok] -> Bool -> LP m a -> LP m a - -> ParserT Text s m (a, Text) + -> ParserT Sources s m (a, Text) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } @@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do Left _ -> mzero Right ((val, raw), st) -> do updateState (updateMacros (sMacros st <>)) - _ <- takeP (T.length (untokenize toks')) + void $ count (T.length (untokenize toks')) anyChar let result = untokenize raw -- ensure we end with space if input did, see #4442 let result' = @@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do return (val, result') applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => Text -> ParserT Text s m Text + => Text -> ParserT Sources s m Text applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> do let retokenize = untokenize <$> many (satisfyTok (const True)) pstate <- getState @@ -301,6 +302,11 @@ QuickCheck property: > let t = T.pack s in untokenize (tokenize "random" t) == t -} +tokenizeSources :: Sources -> [Tok] +tokenizeSources = concatMap tokenizeSource . unSources + where + tokenizeSource (pos, t) = totoks pos t + tokenize :: SourceName -> Text -> [Tok] tokenize sourcename = totoks (initialPos sourcename) diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs index f8c214318..c20b72bc5 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE FlexibleInstances #-} {- | Module : Text.Pandoc.Readers.LaTeX.Types Copyright : Copyright (C) 2017-2021 John MacFarlane @@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) ) where import Data.Text (Text) -import Text.Parsec.Pos (SourcePos) +import Text.Parsec.Pos (SourcePos, sourceName) +import Text.Pandoc.Sources +import Data.List (groupBy) data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | Esc1 | Esc2 | Arg Int @@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | data Tok = Tok SourcePos TokType Text deriving (Eq, Ord, Show) +instance ToSources [Tok] where + toSources = Sources + . map (\ts -> case ts of + Tok p _ _ : _ -> (p, mconcat $ map tokToText ts) + _ -> error "toSources [Tok] encountered empty group") + . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2) + +tokToText :: Tok -> Text +tokToText (Tok _ _ t) = t + data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed deriving (Eq, Ord, Show) diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 21b8feaab..1141af66f 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void) import Control.Monad.Trans (lift) import Control.Monad.Except (throwError) import Data.Maybe (catMaybes, isJust) -import Data.List (intersperse, intercalate) +import Data.List (intersperse) import qualified Data.Text as T import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report) @@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Walk (query) -import Text.Pandoc.Shared (crFilter, mapLeft) +import Text.Pandoc.Shared (mapLeft) import Text.Pandoc.Readers.Roff -- TODO explicit imports -import Text.Parsec hiding (tokenPrim) import qualified Text.Parsec as Parsec import Text.Parsec.Pos (updatePosString) import qualified Data.Foldable as Foldable @@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m -- | Read man (troff) from an input string and return a Pandoc document. -readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc -readMan opts txt = do - tokenz <- lexRoff (initialPos "input") (crFilter txt) +readMan :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc +readMan opts s = do + let Sources inps = toSources s + tokenz <- mconcat <$> mapM (uncurry lexRoff) inps let state = def {readerOptions = opts} :: ManState + let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e + fixError e = e eitherdoc <- readWithMTokens parseMan state (Foldable.toList . unRoffTokens $ tokenz) - either throwError return eitherdoc + either (throwError . fixError) return eitherdoc + readWithMTokens :: PandocMonad m => ParserT [RoffToken] ManState m a -- ^ parser @@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m -> [RoffToken] -- ^ input -> m (Either PandocError a) readWithMTokens parser state input = - let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input + let leftF = PandocParsecError mempty in mapLeft leftF `liftM` runParserT parser state "source" input + parseMan :: PandocMonad m => ManParser m Pandoc parseMan = do bs <- many parseBlock <* eof @@ -89,7 +96,7 @@ parseBlock = choice [ parseList parseTable :: PandocMonad m => ManParser m Blocks parseTable = do - modifyState $ \st -> st { tableCellsPlain = True } + updateState $ \st -> st { tableCellsPlain = True } let isTbl Tbl{} = True isTbl _ = False Tbl _opts rows pos <- msatisfy isTbl @@ -135,7 +142,7 @@ parseTable = do case res' of Left _ -> Prelude.fail "Could not parse table cell" Right x -> do - modifyState $ \s -> s{ tableCellsPlain = False } + updateState $ \s -> s{ tableCellsPlain = False } return x Right x -> return x @@ -222,7 +229,7 @@ parseTitle = do setMeta "section" (linePartsToInlines y) [x] -> setMeta "title" (linePartsToInlines x) [] -> id - modifyState $ \st -> st{ metadata = adjustMeta $ metadata st } + updateState $ \st -> st{ metadata = adjustMeta $ metadata st } return mempty linePartsToInlines :: [LinePart] -> Inlines diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ba8ed147e..69dd51bc4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock) +-- import Debug.Trace (traceShowId) -type MarkdownParser m = ParserT Text ParserState m +type MarkdownParser m = ParserT Sources ParserState m type F = Future ParserState -- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: PandocMonad m +readMarkdown :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -- ^ Input -> m Pandoc readMarkdown opts s = do parsed <- readWithM parseMarkdown def{ stateOptions = opts } - (crFilter s <> "\n\n") + (ensureFinalNewlines 3 (toSources s)) case parsed of Right result -> return result Left e -> throwError e @@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr setPosition oldPos return $ runF meta defaultParserState - parsed <- readWithM parser def{ stateOptions = opts } "" + parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text) case parsed of Right result -> return result Left e -> throwError e @@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr setPosition oldPos return $ runF refs defaultParserState - parsed <- readWithM parser def{ stateOptions = opts } "" + parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text) case parsed of Right result -> return result Left e -> throwError e @@ -146,14 +147,14 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: PandocMonad m => ParserT Text st m () +spnl :: PandocMonad m => ParserT Sources st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -spnl' :: PandocMonad m => ParserT Text st m Text +spnl' :: PandocMonad m => ParserT Sources st m Text spnl' = try $ do xs <- many spaceChar ys <- option "" $ try $ (:) <$> newline @@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _) -- hrule block -- -hrule :: PandocMonad m => ParserT Text st m (F Blocks) +hrule :: PandocMonad m => ParserT Sources st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline blockDelimiter :: PandocMonad m => (Char -> Bool) -> Maybe Int - -> ParserT Text ParserState m Int + -> ParserT Sources ParserState m Int blockDelimiter f len = try $ do skipNonindentSpaces c <- lookAhead (satisfy f) @@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ T.intercalate "\n" lns' -birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text +birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -1025,7 +1026,7 @@ para = try $ do option (B.plain <$> result) $ try $ do newline - (blanklines >> return mempty) + (mempty <$ blanklines) <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote) <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced) <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header) @@ -1170,7 +1171,7 @@ lineBlock = do -- and the length including trailing space. dashedLine :: PandocMonad m => Char - -> ParserT Text st m (Int, Int) + -> ParserT Sources st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m -> MarkdownParser m [Text] rawTableLine indices = do notFollowedBy' (blanklines' <|> tableFooter) - line <- take1WhileP (/='\n') <* newline + line <- anyLine return $ map trim $ tail $ splitTextByIndices (init indices) line @@ -1390,7 +1391,7 @@ pipeTableCell = return $ B.plain <$> result) <|> return mempty -pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter, len) -- Succeed only if current line contains a pipe. -scanForPipe :: PandocMonad m => ParserT Text st m () +scanForPipe :: PandocMonad m => ParserT Sources st m () scanForPipe = do - inp <- getInput - case T.break (\c -> c == '\n' || c == '|') inp of + Sources inps <- getInput + let ln = case inps of + [] -> "" + ((_,t):(_,t'):_) | T.null t -> t' + ((_,t):_) -> t + case T.break (\c -> c == '\n' || c == '|') ln of (_, T.uncons -> Just ('|', _)) -> return () _ -> mzero @@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: PandocMonad m => ParserT Text st m Char +nonEndline :: PandocMonad m => ParserT Sources st m Char nonEndline = satisfy (/='\n') str :: PandocMonad m => MarkdownParser m (F Inlines) str = do result <- mconcat <$> many1 - ( take1WhileP isAlphaNum + ( T.pack <$> (many1 alphaNum) <|> "." <$ try (char '.' <* notFollowedBy (char '.')) ) updateLastStrPos (do guardEnabled Ext_smart @@ -1962,7 +1967,7 @@ rawLaTeXInline' = do s <- rawLaTeXInline return $ return $ B.rawInline "tex" s -- "tex" because it might be context -rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text +rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> textStr completion) return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion -inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text +inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text inBrackets parser = do char '[' contents <- manyChar parser diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 9f4d5e170..825e4a2eb 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -36,17 +36,18 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) -import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines, +import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim, splitTextBy, tshow) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) -- | Read mediawiki from an input string and return a Pandoc document. -readMediaWiki :: PandocMonad m - => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) +readMediaWiki :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a -> m Pandoc readMediaWiki opts s = do + let sources = toSources s parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 @@ -55,7 +56,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (crFilter s <> "\n") + sources case parsed of Right result -> return result Left e -> throwError e @@ -69,7 +70,7 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwInTT :: Bool } -type MWParser m = ParserT Text MWState m +type MWParser m = ParserT Sources MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs index cb141cba5..bbcfe62ea 100644 --- a/src/Text/Pandoc/Readers/Metadata.hs +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL import qualified Text.Pandoc.UTF8 as UTF8 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> BL.ByteString - -> ParserT Text st m (Future st Meta) + -> ParserT Sources st m (Future st Meta) yamlBsToMeta pMetaValue bstr = do case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of Right (YAML.Doc (YAML.Mapping _ _ o):_) @@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing -- Returns filtered list of references. yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> (Text -> Bool) -- ^ Filter for id -> BL.ByteString - -> ParserT Text st m (Future st [MetaValue]) + -> ParserT Sources st m (Future st [MetaValue]) yamlBsToRefs pMetaValue idpred bstr = case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of Right (YAML.Doc o@YAML.Mapping{}:_) @@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t nodeToKey _ = Nothing normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> Text - -> ParserT Text st m (Future st MetaValue) + -> ParserT Sources st m (Future st MetaValue) normalizeMetaValue pMetaValue x = -- Note: a standard quoted or unquoted YAML value will -- not end in a newline, but a "block" set off with @@ -133,9 +133,9 @@ checkBoolean t | otherwise = Nothing yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> YAML.Node YE.Pos - -> ParserT Text st m (Future st MetaValue) + -> ParserT Sources st m (Future st MetaValue) yamlToMetaValue pMetaValue (YAML.Scalar _ x) = case x of YAML.SStr t -> normalizeMetaValue pMetaValue t @@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) = yamlToMetaValue _ _ = return $ return $ MetaString "" yamlMap :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) - -> ParserT Text st m (Future st (M.Map Text MetaValue)) + -> ParserT Sources st m (Future st (M.Map Text MetaValue)) yamlMap pMetaValue o = do kvs <- forM (M.toList o) $ \(key, v) -> do k <- maybe (throwError $ PandocParseError @@ -177,8 +177,8 @@ yamlMap pMetaValue o = do -- | Parse a YAML metadata block using the supplied 'MetaValue' parser. yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m) - => ParserT Text st m (Future st MetaValue) - -> ParserT Text st m (Future st Meta) + => ParserT Sources st m (Future st MetaValue) + -> ParserT Sources st m (Future st Meta) yamlMetaBlock parser = try $ do string "---" blankline @@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do optional blanklines yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml -stopLine :: Monad m => ParserT Text st m () +stopLine :: Monad m => ParserT Sources st m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 698bfd3d7..a0d4534f1 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing -import Text.Pandoc.Shared (crFilter, trimr, tshow) +import Text.Pandoc.Shared (trimr, tshow) -- | Read Muse from an input string and return a Pandoc document. -readMuse :: PandocMonad m +readMuse :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readMuse opts s = do - let input = crFilter s - res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input + let sources = toSources s + res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } + (initialSourceName sources) sources case res of - Left e -> throwError $ PandocParsecError input e + Left e -> throwError $ PandocParsecError sources e Right d -> return d type F = Future MuseState @@ -82,7 +83,7 @@ instance Default MuseEnv where , museInPara = False } -type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m) +type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m) instance HasReaderOptions MuseState where extractReaderOptions = museOptions @@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1) -- * Parsers -- | Parse end-of-line, which can be either a newline or end-of-file. -eol :: Stream s m Char => ParserT s st m () +eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () eol = void newline <|> eof getIndent :: PandocMonad m diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 9c8bc0374..58f235e81 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -21,6 +21,7 @@ import Control.Monad.Except (throwError) import Data.Text (Text) import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Error +import Text.Pandoc.Sources (ToSources(..), sourcesToText) -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, @@ -32,14 +33,15 @@ import Text.Pandoc.Error -- -- > Pandoc nullMeta [Plain [Str "hi"]] -- -readNative :: PandocMonad m +readNative :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readNative _ s = - case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of - Right doc -> return doc - Left _ -> throwError $ PandocParseError "couldn't read native" + let t = sourcesToText . toSources $ s + in case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of + Right doc -> return doc + Left _ -> throwError $ PandocParseError "couldn't read native" readBlocks :: Text -> Either PandocError [Block] readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 5f2ddb876..668c9ca11 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -24,7 +24,8 @@ import Text.Pandoc.Options import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) -import Text.Pandoc.Shared (crFilter, blocksToInlines') +import Text.Pandoc.Shared (blocksToInlines') +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.Pandoc.XML.Light import Control.Monad.Except (throwError) @@ -46,10 +47,14 @@ instance Default OPMLState where , opmlOptions = def } -readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readOPML :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readOPML opts inp = do - (bs, st') <- runStateT - (case parseXMLContents (TL.fromStrict (crFilter inp)) of + let sources = toSources inp + (bs, st') <- + runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of Left msg -> throwError $ PandocXMLError "" msg Right ns -> mapM parseBlock ns) def{ opmlOptions = opts } diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index afeb27a87..8823befdd 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing (reportLogMessages) -import Text.Pandoc.Shared (crFilter) - +import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines) import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) -import Data.Text (Text) - -- | Parse org-mode string and return a Pandoc document. -readOrg :: PandocMonad m +readOrg :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readOrg opts s = do parsed <- flip runReaderT def $ readWithM parseOrg (optionsToParserState opts) - (crFilter s <> "\n\n") + (ensureFinalNewlines 2 (toSources s)) case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 519a6ce04..054f2611a 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -29,6 +29,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.TeXMath (DisplayType (..), readTeX, writePandoc) +import Text.Pandoc.Sources (ToSources(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Control.Monad (guard, mplus, mzero, unless, void, when) @@ -802,7 +803,7 @@ inlineLaTeX = try $ do parseAsInlineLaTeX :: PandocMonad m => Text -> TeXExport -> OrgParser m (Maybe Inlines) parseAsInlineLaTeX cs = \case - TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs + TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs) TeXIgnore -> return (Just mempty) TeXVerbatim -> return (Just $ B.str cs) diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 6ed24a602..c7ea02815 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -114,7 +114,7 @@ import Control.Monad (guard) import Control.Monad.Reader (ReaderT) -- | The parser used to read org files. -type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m) +type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index ac4c0b6cb..a3fcf028c 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -38,25 +38,24 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Printf (printf) import Data.Time.Format -- TODO: -- [ ] .. parsed-literal -- | Parse reStructuredText string and return Pandoc document. -readRST :: PandocMonad m +readRST :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ Text to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readRST opts s = do parsed <- readWithM parseRST def{ stateOptions = opts } - (crFilter s <> "\n\n") + (ensureFinalNewlines 2 (toSources s)) case parsed of Right result -> return result Left e -> throwError e -type RSTParser m = ParserT Text ParserState m +type RSTParser m = ParserT Sources ParserState m -- -- Constants and data structure definitions @@ -151,11 +150,19 @@ parseRST = do startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- T.concat <$> - manyTill (referenceKey <|> anchorDef <|> - noteBlock <|> citationBlock <|> - (snd <$> withRaw comment) <|> - headerBlock <|> lineClump) eof + let chunk = referenceKey + <|> anchorDef + <|> noteBlock + <|> citationBlock + <|> (snd <$> withRaw comment) + <|> headerBlock + <|> lineClump + docMinusKeys <- Sources <$> + manyTill (do pos <- getPosition + t <- chunk + return (pos, t)) eof + -- UGLY: we collapse source position information. + -- TODO: fix the parser to use the F monad instead of two passes setInput docMinusKeys setPosition startPos st' <- getState @@ -348,7 +355,7 @@ singleHeader' = try $ do -- hrule block -- -hrule :: Monad m => ParserT Text st m Blocks +hrule :: Monad m => ParserT Sources st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -363,7 +370,7 @@ hrule = try $ do -- read a line indented by a given string indentedLine :: (HasReaderOptions st, Monad m) - => Int -> ParserT Text st m Text + => Int -> ParserT Sources st m Text indentedLine indents = try $ do lookAhead spaceChar gobbleAtMostSpaces indents @@ -372,7 +379,7 @@ indentedLine indents = try $ do -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. indentedBlock :: (HasReaderOptions st, Monad m) - => ParserT Text st m Text + => ParserT Sources st m Text indentedBlock = try $ do indents <- length <$> lookAhead (many1 spaceChar) lns <- many1 $ try $ do b <- option "" blanklines @@ -381,20 +388,20 @@ indentedBlock = try $ do optional blanklines return $ T.unlines lns -quotedBlock :: Monad m => ParserT Text st m Text +quotedBlock :: Monad m => ParserT Sources st m Text quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines return $ T.unlines lns -codeBlockStart :: Monad m => ParserT Text st m Char +codeBlockStart :: Monad m => ParserT Sources st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: Monad m => ParserT Text ParserState m Blocks +codeBlock :: Monad m => ParserT Sources ParserState m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: Monad m => ParserT Text ParserState m Blocks +codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks codeBlockBody = do lang <- stateRstHighlight <$> getState try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$> @@ -410,14 +417,14 @@ lhsCodeBlock = try $ do return $ B.codeBlockWith ("", ["haskell","literate"], []) $ T.intercalate "\n" lns -latexCodeBlock :: Monad m => ParserT Text st m [Text] +latexCodeBlock :: Monad m => ParserT Sources st m [Text] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Monad m => ParserT Text st m [Text] +birdCodeBlock :: Monad m => ParserT Sources st m [Text] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it @@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine then map (T.drop 1) lns else lns -birdTrackLine :: Monad m => ParserT Text st m Text +birdTrackLine :: Monad m => ParserT Sources st m Text birdTrackLine = char '>' >> anyLine -- @@ -456,7 +463,6 @@ includeDirective top fields body = do let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead oldPos <- getPosition - oldInput <- getInput containers <- stateContainers <$> getState when (f `elem` containers) $ throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos @@ -494,15 +500,11 @@ includeDirective top fields body = do Nothing -> case lookup "literal" fields of Just _ -> return $ B.rawBlock "rst" contents' Nothing -> do - setPosition $ newPos (T.unpack f) 1 1 - setInput $ contents' <> "\n" - bs <- optional blanklines >> - (mconcat <$> many block) - setInput oldInput - setPosition oldPos + addToSources (initialPos (T.unpack f)) + (contents' <> "\n") updateState $ \s -> s{ stateContainers = tail $ stateContainers s } - return bs + return mempty -- @@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Monad m => ParserT Text st m Int +bulletListStart :: Monad m => ParserT Sources st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName :: Monad m => ParserT Text st m Text +simpleReferenceName :: Monad m => ParserT Sources st m Text simpleReferenceName = do x <- alphaNum xs <- many $ alphaNum @@ -1122,7 +1124,7 @@ referenceKey = do -- return enough blanks to replace key return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -targetURI :: Monad m => ParserT Text st m Text +targetURI :: Monad m => ParserT Sources st m Text targetURI = do skipSpaces optional $ try $ newline >> notFollowedBy blankline @@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m () anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI - pos <- getPosition - let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos)) + -- we need to ensure that the keys are ordered by occurrence in + -- the document. + numKeys <- M.size . stateKeys <$> getState + let key = toKey $ "_" <> T.pack (show numKeys) updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } @@ -1250,13 +1254,13 @@ headerBlock = do -- Grid tables TODO: -- - column spans -dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int) +dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator @@ -1382,7 +1386,7 @@ hyphens = do -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Monad m => ParserT Text st m Inlines +escapedChar :: Monad m => ParserT Sources st m Inlines escapedChar = do c <- escaped anyChar return $ if c == ' ' || c == '\n' || c == '\r' -- '\ ' is null in RST diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 509ce1377..47f16ef4b 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared (safeRead) -import Text.Parsec hiding (tokenPrim) import Text.Pandoc.RoffChar (characterCodes, combiningAccents) import qualified Data.Sequence as Seq import qualified Data.Foldable as Foldable @@ -122,16 +121,16 @@ instance Default RoffState where , afterConditional = False } -type RoffLexer m = ParserT T.Text RoffState m +type RoffLexer m = ParserT Sources RoffState m -- -- Lexer: T.Text -> RoffToken -- -eofline :: Stream s m Char => ParsecT s u m () +eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m () eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}") -spacetab :: Stream s m Char => ParsecT s u m Char +spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char spacetab = char ' ' <|> char '\t' characterCodeMap :: M.Map T.Text Char @@ -303,8 +302,7 @@ expandString = try $ do char '*' cs <- escapeArg <|> countChar 1 anyChar s <- linePartsToText <$> resolveText cs pos - getInput >>= setInput . (s <>) - return () + addToInput s -- Parses: '..' quoteArg :: PandocMonad m => RoffLexer m T.Text @@ -316,7 +314,7 @@ escFont = do font' <- if T.null font || font == "P" then prevFont <$> getState else return $ foldr processFontLetter defaultFontSpec $ T.unpack font - modifyState $ \st -> st{ prevFont = currentFont st + updateState $ \st -> st{ prevFont = currentFont st , currentFont = font' } return [Font font'] where @@ -372,8 +370,8 @@ lexTable pos = do spaces opts <- try tableOptions <|> [] <$ optional (char ';') case lookup "tab" opts of - Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c } - _ -> modifyState $ \st -> st{ tableTabChar = '\t' } + Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c } + _ -> updateState $ \st -> st{ tableTabChar = '\t' } spaces skipMany lexComment spaces @@ -489,18 +487,18 @@ lexConditional mname = do ifPart <- do optional $ try $ char '\\' >> newline lexGroup - <|> do modifyState $ \s -> s{ afterConditional = True } + <|> do updateState $ \s -> s{ afterConditional = True } t <- manToken - modifyState $ \s -> s{ afterConditional = False } + updateState $ \s -> s{ afterConditional = False } return t case mbtest of Nothing -> do - putState st -- reset state, so we don't record macros in skipped section + setState st -- reset state, so we don't record macros in skipped section report $ SkippedContent (T.cons '.' mname) pos return mempty Just True -> return ifPart Just False -> do - putState st + setState st return mempty expression :: PandocMonad m => RoffLexer m (Maybe Bool) @@ -515,7 +513,7 @@ expression = do _ -> Nothing where returnValue v = do - modifyState $ \st -> st{ lastExpression = v } + updateState $ \st -> st{ lastExpression = v } return v lexGroup :: PandocMonad m => RoffLexer m RoffTokens @@ -536,7 +534,7 @@ lexIncludeFile args = do result <- readFileFromDirs dirs $ T.unpack fp case result of Nothing -> report $ CouldNotLoadIncludeFile fp pos - Just s -> getInput >>= setInput . (s <>) + Just s -> addToInput s return mempty [] -> return mempty @@ -564,13 +562,13 @@ lexStringDef args = do -- string definition (x:ys) -> do let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys) let stringName = linePartsToText x - modifyState $ \st -> + updateState $ \st -> st{ customMacros = M.insert stringName ts (customMacros st) } return mempty lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens lexMacroDef args = do -- macro definition - modifyState $ \st -> st{ roffMode = CopyMode } + updateState $ \st -> st{ roffMode = CopyMode } (macroName, stopMacro) <- case args of (x : y : _) -> return (linePartsToText x, linePartsToText y) @@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition _ <- lexArgs return () ts <- mconcat <$> manyTill manToken stop - modifyState $ \st -> + updateState $ \st -> st{ customMacros = M.insert macroName ts (customMacros st) , roffMode = NormalMode } return mempty diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index c4d7bcc93..276d28aaa 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -28,22 +28,22 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Text.Pandoc.Shared (crFilter, tshow) +import Text.Pandoc.Shared (tshow) import Text.Pandoc.XML (fromEntities) -- | Read twiki from an input string and return a Pandoc document. -readTWiki :: PandocMonad m +readTWiki :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readTWiki opts s = do - res <- readWithM parseTWiki def{ stateOptions = opts } - (crFilter s <> "\n\n") + let sources = ensureFinalNewlines 2 (toSources s) + res <- readWithM parseTWiki def{ stateOptions = opts } sources case res of Left e -> throwError e Right d -> return d -type TWParser = ParserT Text ParserState +type TWParser = ParserT Sources ParserState -- -- utility functions diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 8d7900de4..981878206 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -53,30 +53,34 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) -import Text.Pandoc.Shared (crFilter, trim, tshow) +import Text.Pandoc.Shared (trim, tshow) -- | Parse a Textile text and return a Pandoc document. -readTextile :: PandocMonad m +readTextile :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readTextile opts s = do - parsed <- readWithM parseTextile def{ stateOptions = opts } - (crFilter s <> "\n\n") + let sources = ensureFinalNewlines 2 (toSources s) + parsed <- readWithM parseTextile def{ stateOptions = opts } sources case parsed of Right result -> return result Left e -> throwError e +type TextileParser = ParserT Sources ParserState -- | Generate a Pandoc ADT from a textile document -parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc +parseTextile :: PandocMonad m => TextileParser m Pandoc parseTextile = do many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys/notes were... - let firstPassParser = noteBlock <|> lineClump - manyTill firstPassParser eof >>= setInput . T.concat + let firstPassParser = do + pos <- getPosition + t <- noteBlock <|> lineClump + return (pos, t) + manyTill firstPassParser eof >>= setInput . Sources setPosition startPos st' <- getState let reversedNotes = stateNotes st' @@ -84,10 +88,10 @@ parseTextile = do -- now parse it for real... Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME -noteMarker :: PandocMonad m => ParserT Text ParserState m Text +noteMarker :: PandocMonad m => TextileParser m Text noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.') -noteBlock :: PandocMonad m => ParserT Text ParserState m Text +noteBlock :: PandocMonad m => TextileParser m Text noteBlock = try $ do startPos <- getPosition ref <- noteMarker @@ -102,11 +106,11 @@ noteBlock = try $ do return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -- | Parse document blocks -parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks +parseBlocks :: PandocMonad m => TextileParser m Blocks parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks] +blockParsers :: PandocMonad m => [TextileParser m Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -121,22 +125,22 @@ blockParsers = [ codeBlock ] -- | Any block in the order of definition of blockParsers -block :: PandocMonad m => ParserT Text ParserState m Blocks +block :: PandocMonad m => TextileParser m Blocks block = do res <- choice blockParsers <?> "block" trace (T.take 60 $ tshow $ B.toList res) return res -commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks +commentBlock :: PandocMonad m => TextileParser m Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines return mempty -codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks +codeBlock :: PandocMonad m => TextileParser m Blocks codeBlock = codeBlockTextile <|> codeBlockHtml -codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks +codeBlockTextile :: PandocMonad m => TextileParser m Blocks codeBlockTextile = try $ do string "bc." <|> string "pre." extended <- option False (True <$ char '.') @@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text trimTrailingNewlines = T.dropWhileEnd (=='\n') -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks +codeBlockHtml :: PandocMonad m => TextileParser m Blocks codeBlockHtml = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre"))) @@ -174,7 +178,7 @@ codeBlockHtml = try $ do return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: PandocMonad m => ParserT Text ParserState m Blocks +header :: PandocMonad m => TextileParser m Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -186,14 +190,14 @@ header = try $ do return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks +blockQuote :: PandocMonad m => TextileParser m Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace B.blockQuote <$> para -- Horizontal rule -hrule :: PandocMonad m => ParserT Text st m Blocks +hrule :: PandocMonad m => TextileParser m Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -208,39 +212,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: PandocMonad m => ParserT Text ParserState m Blocks +anyList :: PandocMonad m => TextileParser m Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks +genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|> @@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do return $ contents <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: PandocMonad m => ParserT Text ParserState m Blocks +definitionList :: PandocMonad m => TextileParser m Blocks definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. -listStart :: PandocMonad m => ParserT Text ParserState m () +listStart :: PandocMonad m => TextileParser m () listStart = genericListStart '*' <|> () <$ genericListStart '#' <|> () <$ definitionListStart -genericListStart :: PandocMonad m => Char -> ParserT Text st m () +genericListStart :: PandocMonad m => Char -> TextileParser m () genericListStart c = () <$ try (many1 (char c) >> whitespace) -basicDLStart :: PandocMonad m => ParserT Text ParserState m () +basicDLStart :: PandocMonad m => TextileParser m () basicDLStart = do char '-' whitespace notFollowedBy newline -definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines +definitionListStart :: PandocMonad m => TextileParser m Inlines definitionListStart = try $ do basicDLStart trimInlines . mconcat <$> @@ -281,15 +285,15 @@ definitionListStart = try $ do -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks]) definitionListItem = try $ do term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef) return (term, def') - where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] + where inlineDef :: PandocMonad m => TextileParser m [Blocks] inlineDef = liftM (\d -> [B.plain d]) $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline - multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] + multilineDef :: PandocMonad m => TextileParser m [Blocks] multilineDef = try $ do optional whitespace >> newline s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline)) @@ -300,7 +304,7 @@ definitionListItem = try $ do -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks +rawHtmlBlock :: PandocMonad m => TextileParser m Blocks rawHtmlBlock = try $ do skipMany spaceChar (_,b) <- htmlTag isBlockTag @@ -308,14 +312,14 @@ rawHtmlBlock = try $ do return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks +rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: PandocMonad m => ParserT Text ParserState m Blocks +para :: PandocMonad m => TextileParser m Blocks para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -326,7 +330,7 @@ toAlignment '>' = AlignRight toAlignment '=' = AlignCenter toAlignment _ = AlignDefault -cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment) +cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment) cellAttributes = try $ do isHeader <- option False (True <$ char '_') -- we just ignore colspan and rowspan markers: @@ -339,7 +343,7 @@ cellAttributes = try $ do return (isHeader, alignment) -- | A table cell spans until a pipe | -tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks) +tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks) tableCell = try $ do char '|' (isHeader, alignment) <- option (False, AlignDefault) cellAttributes @@ -350,7 +354,7 @@ tableCell = try $ do return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells -tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)] +tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)] tableRow = try $ do -- skip optional row attributes optional $ try $ do @@ -360,7 +364,7 @@ tableRow = try $ do many1 tableCell <* char '|' <* blankline -- | A table with an optional header. -table :: PandocMonad m => ParserT Text ParserState m Blocks +table :: PandocMonad m => TextileParser m Blocks table = try $ do -- ignore table attributes caption <- option mempty $ try $ do @@ -388,7 +392,7 @@ table = try $ do (TableFoot nullAttr []) -- | Ignore markers for cols, thead, tfoot. -ignorableRow :: PandocMonad m => ParserT Text ParserState m () +ignorableRow :: PandocMonad m => TextileParser m () ignorableRow = try $ do char '|' oneOf ":^-~" @@ -397,7 +401,7 @@ ignorableRow = try $ do _ <- anyLine return () -explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m () +explicitBlockStart :: PandocMonad m => Text -> TextileParser m () explicitBlockStart name = try $ do string (T.unpack name) attributes @@ -409,8 +413,8 @@ explicitBlockStart name = try $ do -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: PandocMonad m => Text -- ^ block tag name - -> ParserT Text ParserState m Blocks -- ^ implicit block - -> ParserT Text ParserState m Blocks + -> TextileParser m Blocks -- ^ implicit block + -> TextileParser m Blocks maybeExplicitBlock name blk = try $ do optional $ explicitBlockStart name blk @@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: PandocMonad m => ParserT Text ParserState m Inlines +inline :: PandocMonad m => TextileParser m Inlines inline = choice inlineParsers <?> "inline" -- | Inline parsers tried in order -inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines] +inlineParsers :: PandocMonad m => [TextileParser m Inlines] inlineParsers = [ str , whitespace , endline @@ -447,7 +451,7 @@ inlineParsers = [ str ] -- | Inline markups -inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines +inlineMarkup :: PandocMonad m => TextileParser m Inlines inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (string "**") B.strong , simpleInline (string "__") B.emph @@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) ] -- | Trademark, registered, copyright -mark :: PandocMonad m => ParserT Text st m Inlines +mark :: PandocMonad m => TextileParser m Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: PandocMonad m => ParserT Text st m Inlines +reg :: PandocMonad m => TextileParser m Inlines reg = do oneOf "Rr" char ')' return $ B.str "\174" -tm :: PandocMonad m => ParserT Text st m Inlines +tm :: PandocMonad m => TextileParser m Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' return $ B.str "\8482" -copy :: PandocMonad m => ParserT Text st m Inlines +copy :: PandocMonad m => TextileParser m Inlines copy = do oneOf "Cc" char ')' return $ B.str "\169" -note :: PandocMonad m => ParserT Text ParserState m Inlines +note :: PandocMonad m => TextileParser m Inlines note = try $ do ref <- char '[' *> many1 digit <* char ']' notes <- stateNotes <$> getState @@ -507,13 +511,13 @@ wordBoundaries :: [Char] wordBoundaries = markupChars <> stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text +hyphenedWords :: PandocMonad m => TextileParser m Text hyphenedWords = do x <- wordChunk xs <- many (try $ char '-' >> wordChunk) return $ T.intercalate "-" (x:xs) -wordChunk :: PandocMonad m => ParserT Text ParserState m Text +wordChunk :: PandocMonad m => TextileParser m Text wordChunk = try $ do hd <- noneOf wordBoundaries tl <- many ( noneOf wordBoundaries <|> @@ -522,7 +526,7 @@ wordChunk = try $ do return $ T.pack $ hd:tl -- | Any string -str :: PandocMonad m => ParserT Text ParserState m Inlines +str :: PandocMonad m => TextileParser m Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediately @@ -535,11 +539,11 @@ str = do return $ B.str fullStr -- | Some number of space chars -whitespace :: PandocMonad m => ParserT Text st m Inlines +whitespace :: PandocMonad m => TextileParser m Inlines whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: PandocMonad m => ParserT Text ParserState m Inlines +endline :: PandocMonad m => TextileParser m Inlines endline = try $ do newline notFollowedBy blankline @@ -547,18 +551,18 @@ endline = try $ do notFollowedBy rawHtmlBlock return B.linebreak -rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines +rawHtmlInline :: PandocMonad m => TextileParser m Inlines rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines +rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: PandocMonad m => ParserT Text ParserState m Inlines +link :: PandocMonad m => TextileParser m Inlines link = try $ do bracketed <- (True <$ char '[') <|> return False char '"' *> notFollowedBy (oneOf " \t\n\r") @@ -578,7 +582,7 @@ link = try $ do else B.spanWith attr $ B.link url "" name' -- | image embedding -image :: PandocMonad m => ParserT Text ParserState m Inlines +image :: PandocMonad m => TextileParser m Inlines image = try $ do char '!' >> notFollowedBy space (ident, cls, kvs) <- attributes @@ -590,51 +594,51 @@ image = try $ do char '!' return $ B.imageWith attr src alt (B.str alt) -escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines +escapedInline :: PandocMonad m => TextileParser m Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines +escapedEqs :: PandocMonad m => TextileParser m Inlines escapedEqs = B.str . T.pack <$> try (string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines +escapedTag :: PandocMonad m => TextileParser m Inlines escapedTag = B.str . T.pack <$> try (string "<notextile>" *> manyTill anyChar' (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: PandocMonad m => ParserT Text ParserState m Inlines +symbol :: PandocMonad m => TextileParser m Inlines symbol = B.str . T.singleton <$> (notFollowedBy newline *> notFollowedBy rawHtmlBlock *> oneOf wordBoundaries) -- | Inline code -code :: PandocMonad m => ParserT Text ParserState m Inlines +code :: PandocMonad m => TextileParser m Inlines code = code1 <|> code2 -- any character except a newline before a blank line -anyChar' :: PandocMonad m => ParserT Text ParserState m Char +anyChar' :: PandocMonad m => TextileParser m Char anyChar' = satisfy (/='\n') <|> try (char '\n' <* notFollowedBy blankline) -code1 :: PandocMonad m => ParserT Text ParserState m Inlines +code1 :: PandocMonad m => TextileParser m Inlines code1 = B.code . T.pack <$> surrounded (char '@') anyChar' -code2 :: PandocMonad m => ParserT Text ParserState m Inlines +code2 :: PandocMonad m => TextileParser m Inlines code2 = do htmlTag (tagOpen (=="tt") null) B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: PandocMonad m => ParserT Text ParserState m Attr +attributes :: PandocMonad m => TextileParser m Attr attributes = foldl' (flip ($)) ("",[],[]) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) -specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr) specialAttribute = do alignStr <- ("center" <$ char '=') <|> ("justify" <$ try (string "<>")) <|> @@ -643,11 +647,11 @@ specialAttribute = do notFollowedBy spaceChar return $ addStyle $ T.pack $ "text-align:" ++ alignStr -attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +attribute :: PandocMonad m => TextileParser m (Attr -> Attr) attribute = try $ (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar -classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')') @@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id) classes' -> return $ \(_,_,keyvals) -> ("",classes',keyvals) -styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr) styleAttr = do style <- try $ enclosed (char '{') (char '}') anyChar' return $ addStyle $ T.pack style @@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) = where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals] -langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +langAttr :: PandocMonad m => TextileParser m (Attr -> Attr) langAttr = do lang <- try $ enclosed (char '[') (char ']') alphaNum return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals) -- | Parses material surrounded by a parser. surrounded :: (PandocMonad m, Show t) - => ParserT Text st m t -- ^ surrounding parser - -> ParserT Text st m a -- ^ content parser (to be used repeatedly) - -> ParserT Text st m [a] + => ParserT Sources st m t -- ^ surrounding parser + -> ParserT Sources st m a -- ^ content parser (to be used repeatedly) + -> ParserT Sources st m [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) simpleInline :: PandocMonad m - => ParserT Text ParserState m t -- ^ surrounding parser + => TextileParser m t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor - -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly) + -> TextileParser m Inlines -- ^ content parser (to be used repeatedly) simpleInline border construct = try $ do notAfterString border *> notFollowedBy (oneOf " \t\n\r") @@ -700,7 +704,7 @@ simpleInline border construct = try $ do then body else B.spanWith attr body -groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines +groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace @@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do char ']' return $ sp1 <> result <> sp2 -eof' :: Monad m => ParserT Text s m Char +eof' :: Monad m => ParserT Sources s m Char eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs index fb4b662c5..5c414fdec 100644 --- a/src/Text/Pandoc/Readers/TikiWiki.hs +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -30,23 +30,23 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging (Verbosity (..)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) -import Text.Pandoc.Shared (crFilter, safeRead) +import Text.Pandoc.Shared (safeRead) import Text.Pandoc.XML (fromEntities) import Text.Printf (printf) -- | Read TikiWiki from an input string and return a Pandoc document. -readTikiWiki :: PandocMonad m +readTikiWiki :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readTikiWiki opts s = do - res <- readWithM parseTikiWiki def{ stateOptions = opts } - (crFilter s <> "\n\n") + let sources = ensureFinalNewlines 2 (toSources s) + res <- readWithM parseTikiWiki def{ stateOptions = opts } sources case res of Left e -> throwError e Right d -> return d -type TikiWikiParser = ParserT Text ParserState +type TikiWikiParser = ParserT Sources ParserState -- -- utility functions diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index d355a4b55..6f92f0063 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (space, spaces, uri) -import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI) +import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI) -type T2T = ParserT Text ParserState (Reader T2TMeta) +type T2T = ParserT Sources ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file @@ -68,15 +68,15 @@ getT2TMeta = do (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: PandocMonad m +readTxt2Tags :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readTxt2Tags opts s = do + let sources = ensureFinalNewlines 2 (toSources s) meta <- getT2TMeta let parsed = flip runReader meta $ - readWithM parseT2T (def {stateOptions = opts}) $ - crFilter s <> "\n\n" + readWithM parseT2T (def {stateOptions = opts}) sources case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 74dac5ea7..460f304c4 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress, many1Till, orderedListMarker, readWithM, registerHeader, spaceChar, stateMeta, stateOptions, uri, manyTillChar, manyChar, textStr, - many1Char, countChar, many1TillChar) -import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast, + many1Char, countChar, many1TillChar, + alphaNum, anyChar, char, newline, noneOf, oneOf, + space, spaces, string) +import Text.Pandoc.Sources (ToSources(..), Sources) +import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast, isURI, tshow) -import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space, - spaces, string) import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1, manyTill, notFollowedBy, option, skipMany1) import Text.Parsec.Prim (getState, many, try, updateState, (<|>)) -readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readVimwiki :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readVimwiki opts s = do - res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s + let sources = toSources s + res <- readWithM parseVimwiki def{ stateOptions = opts } sources case res of Left e -> throwError e Right result -> return result -type VwParser = ParserT Text ParserState +type VwParser = ParserT Sources ParserState -- constants |