aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Class.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Class.hs')
-rw-r--r--src/Text/Pandoc/Class.hs539
1 files changed, 0 insertions, 539 deletions
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
deleted file mode 100644
index fb148666c..000000000
--- a/src/Text/Pandoc/Class.hs
+++ /dev/null
@@ -1,539 +0,0 @@
-{-# LANGUAGE DeriveFunctor, DeriveDataTypeable, TypeSynonymInstances,
-FlexibleInstances, GeneralizedNewtypeDeriving, FlexibleContexts #-}
-
-{-
-Copyright (C) 2016 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Class
- Copyright : Copyright (C) 2016 Jesse Rosenthal
- License : GNU GPL, version 2 or above
-
- Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
- Stability : alpha
- Portability : portable
-
-Typeclass for pandoc readers and writers, allowing both IO and pure instances.
--}
-
-module Text.Pandoc.Class ( PandocMonad(..)
- , CommonState(..)
- , PureState(..)
- , getPureState
- , getsPureState
- , putPureState
- , modifyPureState
- , getPOSIXTime
- , getZonedTime
- , readFileFromDirs
- , report
- , getLog
- , setVerbosity
- , getMediaBag
- , setMediaBag
- , insertMedia
- , fetchItem
- , getInputFiles
- , getOutputFile
- , PandocIO(..)
- , PandocPure(..)
- , FileTree(..)
- , FileInfo(..)
- , runIO
- , runIOorExplode
- , runPure
- , withMediaBag
- ) where
-
-import Prelude hiding (readFile)
-import System.Random (StdGen, next, mkStdGen)
-import qualified System.Random as IO (newStdGen)
-import Codec.Archive.Zip (Archive, fromArchive, emptyArchive)
-import Data.Unique (hashUnique)
-import qualified Data.Unique as IO (newUnique)
-import qualified Text.Pandoc.Shared as IO ( readDataFile
- , openURL )
-import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Pandoc.Compat.Time (UTCTime)
-import Text.Pandoc.Logging
-import Text.Parsec (ParsecT)
-import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
-import Text.Pandoc.MIME (MimeType, getMimeType)
-import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
- , posixSecondsToUTCTime
- , POSIXTime )
-import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
-import Network.URI ( escapeURIString, nonStrictRelativeTo,
- unEscapeString, parseURIReference, isAllowedInURI,
- parseURI, URI(..) )
-import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
-import qualified Text.Pandoc.MediaBag as MB
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as BL
-import qualified System.Environment as IO (lookupEnv)
-import System.FilePath.Glob (match, compile)
-import System.FilePath ((</>), takeExtension, dropExtension)
-import qualified System.FilePath.Glob as IO (glob)
-import qualified System.Directory as IO (getModificationTime)
-import Control.Monad as M (fail)
-import Control.Monad.Reader (ReaderT)
-import Control.Monad.State
-import Control.Monad.Except
-import Control.Monad.Writer (WriterT)
-import Control.Monad.RWS (RWST)
-import Data.Word (Word8)
-import Data.Default
-import System.IO.Error
-import System.IO (stderr)
-import qualified Data.Map as M
-import Text.Pandoc.Error
-import Text.Printf (printf)
-
-class (Functor m, Applicative m, Monad m, MonadError PandocError m)
- => PandocMonad m where
- lookupEnv :: String -> m (Maybe String)
- getCurrentTime :: m UTCTime
- getCurrentTimeZone :: m TimeZone
- newStdGen :: m StdGen
- newUniqueHash :: m Int
- openURL :: String -> m (B.ByteString, Maybe MimeType)
- readFileLazy :: FilePath -> m BL.ByteString
- readFileStrict :: FilePath -> m B.ByteString
- readDataFile :: Maybe FilePath
- -> FilePath
- -> m B.ByteString
- glob :: String -> m [FilePath]
- getModificationTime :: FilePath -> m UTCTime
- getCommonState :: m CommonState
- putCommonState :: CommonState -> m ()
-
- getsCommonState :: (CommonState -> a) -> m a
- getsCommonState f = f <$> getCommonState
-
- modifyCommonState :: (CommonState -> CommonState) -> m ()
- modifyCommonState f = getCommonState >>= putCommonState . f
-
- logOutput :: LogMessage -> m ()
-
--- Functions defined for all PandocMonad instances
-
-setVerbosity :: PandocMonad m => Verbosity -> m ()
-setVerbosity verbosity =
- modifyCommonState $ \st -> st{ stVerbosity = verbosity }
-
-getLog :: PandocMonad m => m [LogMessage]
-getLog = reverse <$> getsCommonState stLog
-
-report :: PandocMonad m => LogMessage -> m ()
-report msg = do
- verbosity <- getsCommonState stVerbosity
- let level = messageVerbosity msg
- when (level <= verbosity) $ do
- logOutput msg
- unless (level == DEBUG) $
- modifyCommonState $ \st -> st{ stLog = msg : stLog st }
-
-setMediaBag :: PandocMonad m => MediaBag -> m ()
-setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
-
-getMediaBag :: PandocMonad m => m MediaBag
-getMediaBag = getsCommonState stMediaBag
-
-insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
-insertMedia fp mime bs = do
- mb <- getsCommonState stMediaBag
- let mb' = MB.insertMedia fp mime bs mb
- modifyCommonState $ \st -> st{stMediaBag = mb' }
-
-getInputFiles :: PandocMonad m => m (Maybe [FilePath])
-getInputFiles = getsCommonState stInputFiles
-
-getOutputFile :: PandocMonad m => m (Maybe FilePath)
-getOutputFile = getsCommonState stOutputFile
-
-getPOSIXTime :: (PandocMonad m) => m POSIXTime
-getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
-
-getZonedTime :: (PandocMonad m) => m ZonedTime
-getZonedTime = do
- t <- getCurrentTime
- tz <- getCurrentTimeZone
- return $ utcToZonedTime tz t
-
--- | Read file, checking in any number of directories.
-readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String)
-readFileFromDirs [] _ = return Nothing
-readFileFromDirs (d:ds) f = catchError
- ((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f))
- (\_ -> readFileFromDirs ds f)
-
---
-
-data CommonState = CommonState { stLog :: [LogMessage]
- , stMediaBag :: MediaBag
- , stInputFiles :: Maybe [FilePath]
- , stOutputFile :: Maybe FilePath
- , stVerbosity :: Verbosity
- }
-
-instance Default CommonState where
- def = CommonState { stLog = []
- , stMediaBag = mempty
- , stInputFiles = Nothing
- , stOutputFile = Nothing
- , stVerbosity = WARNING
- }
-
-runIO :: PandocIO a -> IO (Either PandocError a)
-runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
-
-withMediaBag :: PandocMonad m => m a -> m (a, MediaBag)
-withMediaBag ma = ((,)) <$> ma <*> getMediaBag
-
-runIOorExplode :: PandocIO a -> IO a
-runIOorExplode ma = runIO ma >>= handleError
-
-newtype PandocIO a = PandocIO {
- unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
- } deriving ( MonadIO
- , Functor
- , Applicative
- , Monad
- , MonadError PandocError
- )
-
-instance PandocMonad PandocIO where
- lookupEnv = liftIO . IO.lookupEnv
- getCurrentTime = liftIO IO.getCurrentTime
- getCurrentTimeZone = liftIO IO.getCurrentTimeZone
- newStdGen = liftIO IO.newStdGen
- newUniqueHash = hashUnique <$> (liftIO IO.newUnique)
- openURL u = do
- eitherRes <- liftIO $ (tryIOError $ IO.openURL u)
- case eitherRes of
- Right (Right res) -> return res
- Right (Left _) -> throwError $ PandocFileReadError u
- Left _ -> throwError $ PandocFileReadError u
- readFileLazy s = do
- eitherBS <- liftIO (tryIOError $ BL.readFile s)
- case eitherBS of
- Right bs -> return bs
- Left _ -> throwError $ PandocFileReadError s
- readFileStrict s = do
- eitherBS <- liftIO (tryIOError $ B.readFile s)
- case eitherBS of
- Right bs -> return bs
- Left _ -> throwError $ PandocFileReadError s
- -- TODO: Make this more sensitive to the different sorts of failure
- readDataFile mfp fname = do
- eitherBS <- liftIO (tryIOError $ IO.readDataFile mfp fname)
- case eitherBS of
- Right bs -> return bs
- Left _ -> throwError $ PandocFileReadError fname
- glob = liftIO . IO.glob
- getModificationTime fp = do
- eitherMtime <- liftIO (tryIOError $ IO.getModificationTime fp)
- case eitherMtime of
- Right mtime -> return mtime
- Left _ -> throwError $ PandocFileReadError fp
- getCommonState = PandocIO $ lift get
- putCommonState x = PandocIO $ lift $ put x
- logOutput msg =
- liftIO $ UTF8.hPutStrLn stderr $ printf "%-7s %s"
- (show (messageVerbosity msg)) (showLogMessage msg)
-
--- | Specialized version of parseURIReference that disallows
--- single-letter schemes. Reason: these are usually windows absolute
--- paths.
-parseURIReference' :: String -> Maybe URI
-parseURIReference' s =
- case parseURIReference s of
- Just u
- | length (uriScheme u) > 2 -> Just u
- | null (uriScheme u) -> Just u -- protocol-relative
- _ -> Nothing
-
--- | Fetch an image or other item from the local filesystem or the net.
--- Returns raw content and maybe mime type.
-fetchItem :: PandocMonad m
- => Maybe String
- -> String
- -> m (B.ByteString, Maybe MimeType)
-fetchItem sourceURL s = do
- mediabag <- getMediaBag
- case lookupMedia s mediabag of
- Just (mime, bs) -> return $ (BL.toStrict bs, Just mime)
- Nothing -> downloadOrRead sourceURL s
-
-downloadOrRead :: PandocMonad m
- => Maybe String
- -> String
- -> m (B.ByteString, Maybe MimeType)
-downloadOrRead sourceURL s = do
- case (sourceURL >>= parseURIReference' .
- ensureEscaped, ensureEscaped s) of
- (Just u, s') -> -- try fetching from relative path at source
- case parseURIReference' s' of
- Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
- Nothing -> openURL s' -- will throw error
- (Nothing, s'@('/':'/':_)) -> -- protocol-relative URI
- case parseURIReference' s' of
- Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon
- Nothing -> openURL s' -- will throw error
- (Nothing, s') ->
- case parseURI s' of -- requires absolute URI
- -- We don't want to treat C:/ as a scheme:
- Just u' | length (uriScheme u') > 2 -> openURL (show u')
- Just u' | uriScheme u' == "file:" ->
- readLocalFile $ dropWhile (=='/') (uriPath u')
- _ -> readLocalFile fp -- get from local file system
- where readLocalFile f = do
- cont <- readFileStrict f
- return (cont, mime)
- httpcolon = URI{ uriScheme = "http:",
- uriAuthority = Nothing,
- uriPath = "",
- uriQuery = "",
- uriFragment = "" }
- dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
- fp = unEscapeString $ dropFragmentAndQuery s
- mime = case takeExtension fp of
- ".gz" -> getMimeType $ dropExtension fp
- ".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
- x -> getMimeType x
- ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
- convertSlash '\\' = '/'
- convertSlash x = x
-
-data PureState = PureState { stStdGen :: StdGen
- , stWord8Store :: [Word8] -- should be
- -- inifinite,
- -- i.e. [1..]
- , stUniqStore :: [Int] -- should be
- -- inifinite and
- -- contain every
- -- element at most
- -- once, e.g. [1..]
- , stEnv :: [(String, String)]
- , stTime :: UTCTime
- , stTimeZone :: TimeZone
- , stReferenceDocx :: Archive
- , stReferenceODT :: Archive
- , stFiles :: FileTree
- , stUserDataDir :: FileTree
- , stCabalDataDir :: FileTree
- , stFontFiles :: [FilePath]
- }
-
-instance Default PureState where
- def = PureState { stStdGen = mkStdGen 1848
- , stWord8Store = [1..]
- , stUniqStore = [1..]
- , stEnv = [("USER", "pandoc-user")]
- , stTime = posixSecondsToUTCTime 0
- , stTimeZone = utc
- , stReferenceDocx = emptyArchive
- , stReferenceODT = emptyArchive
- , stFiles = mempty
- , stUserDataDir = mempty
- , stCabalDataDir = mempty
- , stFontFiles = []
- }
-
-
-getPureState :: PandocPure PureState
-getPureState = PandocPure $ lift $ lift $ get
-
-getsPureState :: (PureState -> a) -> PandocPure a
-getsPureState f = f <$> getPureState
-
-putPureState :: PureState -> PandocPure ()
-putPureState ps= PandocPure $ lift $ lift $ put ps
-
-modifyPureState :: (PureState -> PureState) -> PandocPure ()
-modifyPureState f = PandocPure $ lift $ lift $ modify f
-
-
-data FileInfo = FileInfo { infoFileMTime :: UTCTime
- , infoFileContents :: B.ByteString
- }
-
-newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo}
- deriving (Monoid)
-
-getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
-getFileInfo fp tree = M.lookup fp $ unFileTree tree
-
-
-newtype PandocPure a = PandocPure {
- unPandocPure :: ExceptT PandocError
- (StateT CommonState (State PureState)) a
- } deriving ( Functor
- , Applicative
- , Monad
- , MonadError PandocError
- )
-
-runPure :: PandocPure a -> Either PandocError a
-runPure x = flip evalState def $
- flip evalStateT def $
- runExceptT $
- unPandocPure x
-
-instance PandocMonad PandocPure where
- lookupEnv s = do
- env <- getsPureState stEnv
- return (lookup s env)
-
- getCurrentTime = getsPureState stTime
-
- getCurrentTimeZone = getsPureState stTimeZone
-
- newStdGen = do
- g <- getsPureState stStdGen
- let (_, nxtGen) = next g
- modifyPureState $ \st -> st { stStdGen = nxtGen }
- return g
-
- newUniqueHash = do
- uniqs <- getsPureState stUniqStore
- case uniqs of
- u : us -> do
- modifyPureState $ \st -> st { stUniqStore = us }
- return u
- _ -> M.fail "uniq store ran out of elements"
- openURL _ = throwError $ PandocSomeError "Cannot open URL in PandocPure"
- readFileLazy fp = do
- fps <- getsPureState stFiles
- case infoFileContents <$> getFileInfo fp fps of
- Just bs -> return (BL.fromStrict bs)
- Nothing -> throwError $ PandocFileReadError fp
- readFileStrict fp = do
- fps <- getsPureState stFiles
- case infoFileContents <$> getFileInfo fp fps of
- Just bs -> return bs
- Nothing -> throwError $ PandocFileReadError fp
- readDataFile Nothing "reference.docx" = do
- (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx
- readDataFile Nothing "reference.odt" = do
- (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT
- readDataFile Nothing fname = do
- let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
- readFileStrict fname'
- readDataFile (Just userDir) fname = do
- userDirFiles <- getsPureState stUserDataDir
- case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of
- Just bs -> return bs
- Nothing -> readDataFile Nothing fname
-
- glob s = do
- fontFiles <- getsPureState stFontFiles
- return (filter (match (compile s)) fontFiles)
-
- getModificationTime fp = do
- fps <- getsPureState stFiles
- case infoFileMTime <$> (getFileInfo fp fps) of
- Just tm -> return tm
- Nothing -> throwError $ PandocFileReadError fp
-
- getCommonState = PandocPure $ lift $ get
- putCommonState x = PandocPure $ lift $ put x
-
- logOutput _msg = return ()
-
-instance PandocMonad m => PandocMonad (ParsecT s st m) where
- lookupEnv = lift . lookupEnv
- getCurrentTime = lift getCurrentTime
- getCurrentTimeZone = lift getCurrentTimeZone
- newStdGen = lift newStdGen
- newUniqueHash = lift newUniqueHash
- openURL = lift . openURL
- readFileLazy = lift . readFileLazy
- readFileStrict = lift . readFileStrict
- readDataFile mbuserdir = lift . readDataFile mbuserdir
- glob = lift . glob
- getModificationTime = lift . getModificationTime
- getCommonState = lift getCommonState
- putCommonState = lift . putCommonState
- logOutput = lift . logOutput
-
-instance PandocMonad m => PandocMonad (ReaderT r m) where
- lookupEnv = lift . lookupEnv
- getCurrentTime = lift getCurrentTime
- getCurrentTimeZone = lift getCurrentTimeZone
- newStdGen = lift newStdGen
- newUniqueHash = lift newUniqueHash
- openURL = lift . openURL
- readFileLazy = lift . readFileLazy
- readFileStrict = lift . readFileStrict
- readDataFile mbuserdir = lift . readDataFile mbuserdir
- glob = lift . glob
- getModificationTime = lift . getModificationTime
- getCommonState = lift getCommonState
- putCommonState = lift . putCommonState
- logOutput = lift . logOutput
-
-instance (PandocMonad m, Monoid w) => PandocMonad (WriterT w m) where
- lookupEnv = lift . lookupEnv
- getCurrentTime = lift getCurrentTime
- getCurrentTimeZone = lift getCurrentTimeZone
- newStdGen = lift newStdGen
- newUniqueHash = lift newUniqueHash
- openURL = lift . openURL
- readFileLazy = lift . readFileLazy
- readFileStrict = lift . readFileStrict
- readDataFile mbuserdir = lift . readDataFile mbuserdir
- glob = lift . glob
- getModificationTime = lift . getModificationTime
- getCommonState = lift getCommonState
- putCommonState = lift . putCommonState
- logOutput = lift . logOutput
-
-instance (PandocMonad m, Monoid w) => PandocMonad (RWST r w st m) where
- lookupEnv = lift . lookupEnv
- getCurrentTime = lift getCurrentTime
- getCurrentTimeZone = lift getCurrentTimeZone
- newStdGen = lift newStdGen
- newUniqueHash = lift newUniqueHash
- openURL = lift . openURL
- readFileLazy = lift . readFileLazy
- readFileStrict = lift . readFileStrict
- readDataFile mbuserdir = lift . readDataFile mbuserdir
- glob = lift . glob
- getModificationTime = lift . getModificationTime
- getCommonState = lift getCommonState
- putCommonState = lift . putCommonState
- logOutput = lift . logOutput
-
-instance PandocMonad m => PandocMonad (StateT st m) where
- lookupEnv = lift . lookupEnv
- getCurrentTime = lift getCurrentTime
- getCurrentTimeZone = lift getCurrentTimeZone
- newStdGen = lift newStdGen
- newUniqueHash = lift newUniqueHash
- openURL = lift . openURL
- readFileLazy = lift . readFileLazy
- readFileStrict = lift . readFileStrict
- readDataFile mbuserdir = lift . readDataFile mbuserdir
- glob = lift . glob
- getModificationTime = lift . getModificationTime
- getCommonState = lift getCommonState
- putCommonState = lift . putCommonState
- logOutput = lift . logOutput
-