aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Sources.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Sources.hs')
-rw-r--r--src/Text/Pandoc/Sources.hs195
1 files changed, 195 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Sources.hs b/src/Text/Pandoc/Sources.hs
new file mode 100644
index 000000000..5511ccfb8
--- /dev/null
+++ b/src/Text/Pandoc/Sources.hs
@@ -0,0 +1,195 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.Sources
+ Copyright : Copyright (C) 2021 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Defines Sources object to be used as input to pandoc parsers and redefines Char
+parsers so they get source position information from it.
+-}
+
+module Text.Pandoc.Sources
+ ( Sources(..)
+ , ToSources(..)
+ , UpdateSourcePos(..)
+ , sourcesToText
+ , initialSourceName
+ , addToSources
+ , ensureFinalNewlines
+ , addToInput
+ , satisfy
+ , oneOf
+ , noneOf
+ , anyChar
+ , char
+ , string
+ , newline
+ , space
+ , spaces
+ , letter
+ , digit
+ , hexDigit
+ , alphaNum
+ )
+where
+import qualified Text.Parsec as P
+import Text.Parsec (Stream(..), ParsecT)
+import Text.Parsec.Pos as P
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
+import Data.String (IsString(..))
+import qualified Data.List.NonEmpty as NonEmpty
+
+-- | A list of inputs labeled with source positions. It is assumed
+-- that the 'Text's have @\n@ line endings.
+newtype Sources = Sources { unSources :: [(SourcePos, Text)] }
+ deriving (Show, Semigroup, Monoid)
+
+instance Monad m => Stream Sources m Char where
+ uncons (Sources []) = return Nothing
+ uncons (Sources ((pos,t):rest)) =
+ case T.uncons t of
+ Nothing -> uncons (Sources rest)
+ Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest))
+
+instance IsString Sources where
+ fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))]
+
+class ToSources a where
+ toSources :: a -> Sources
+
+instance ToSources Text where
+ toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)]
+
+instance ToSources [(FilePath, Text)] where
+ toSources = Sources
+ . map (\(fp,t) ->
+ (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n'))
+
+instance ToSources Sources where
+ toSources = id
+
+sourcesToText :: Sources -> Text
+sourcesToText (Sources xs) = mconcat $ map snd xs
+
+addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
+addToSources pos t = do
+ curpos <- P.getPosition
+ Sources xs <- P.getInput
+ let xs' = case xs of
+ [] -> []
+ ((_,t'):rest) -> (curpos,t'):rest
+ P.setInput $ Sources ((pos, T.filter (/='\r') t):xs')
+
+ensureFinalNewlines :: Int -- ^ number of trailing newlines
+ -> Sources
+ -> Sources
+ensureFinalNewlines n (Sources xs) =
+ case NonEmpty.nonEmpty xs of
+ Nothing -> Sources [(initialPos "", T.replicate n "\n")]
+ Just lst ->
+ case NonEmpty.last lst of
+ (spos, t) ->
+ case T.length (T.takeWhileEnd (=='\n') t) of
+ len | len >= n -> Sources xs
+ | otherwise -> Sources (NonEmpty.init lst ++
+ [(spos,
+ t <> T.replicate (n - len) "\n")])
+
+class UpdateSourcePos s c where
+ updateSourcePos :: SourcePos -> c -> s -> SourcePos
+
+instance UpdateSourcePos Text Char where
+ updateSourcePos pos c _ = updatePosChar pos c
+
+instance UpdateSourcePos Sources Char where
+ updateSourcePos pos c sources =
+ case sources of
+ Sources [] -> updatePosChar pos c
+ Sources ((_,t):(pos',_):_)
+ | T.null t -> pos'
+ Sources _ ->
+ case c of
+ '\n' -> incSourceLine (setSourceColumn pos 1) 1
+ '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4))
+ _ -> incSourceColumn pos 1
+
+-- | Get name of first source in 'Sources'.
+initialSourceName :: Sources -> FilePath
+initialSourceName (Sources []) = ""
+initialSourceName (Sources ((pos,_):_)) = sourceName pos
+
+-- | Add some text to the beginning of the input sources.
+-- This simplifies code that expands macros.
+addToInput :: Monad m => Text -> ParsecT Sources u m ()
+addToInput t = do
+ Sources xs <- P.getInput
+ case xs of
+ [] -> P.setInput $ Sources [(initialPos "",t)]
+ (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest)
+
+-- We need to redefine the parsers in Text.Parsec.Char so that they
+-- update source positions properly from the Sources stream.
+
+satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => (Char -> Bool) -> ParsecT s u m Char
+satisfy f = P.tokenPrim show updateSourcePos matcher
+ where
+ matcher c = if f c then Just c else Nothing
+
+oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => [Char] -> ParsecT s u m Char
+oneOf cs = satisfy (`elem` cs)
+
+noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => [Char] -> ParsecT s u m Char
+noneOf cs = satisfy (`notElem` cs)
+
+anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+anyChar = satisfy (const True)
+
+char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => Char -> ParsecT s u m Char
+char c = satisfy (== c)
+
+string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => [Char] -> ParsecT s u m [Char]
+string = mapM char
+
+newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+newline = satisfy (== '\n')
+
+space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+space = satisfy isSpace
+
+spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m ()
+spaces = P.skipMany space P.<?> "white space"
+
+letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+letter = satisfy isLetter
+
+alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+alphaNum = satisfy isAlphaNum
+
+digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+digit = satisfy isDigit
+
+hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+ => ParsecT s u m Char
+hexDigit = satisfy isHexDigit