From 0b7b846a283e4f101c8404f760b65a523e64cf51 Mon Sep 17 00:00:00 2001 From: Andrew Miller Date: Wed, 8 Aug 2012 09:10:18 +1200 Subject: Make Resource Provider keep list of files in a set, rather than as a list, and use Data.Set.member rather than Prelude.elem to test whether a resource exists. On the site I'm currently working on (where there are ~1000 resources) this makes hakyll about 25% faster. --- src/Hakyll/Core/Resource/Provider.hs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Hakyll/Core/Resource/Provider.hs b/src/Hakyll/Core/Resource/Provider.hs index d91c374..14848a8 100644 --- a/src/Hakyll/Core/Resource/Provider.hs +++ b/src/Hakyll/Core/Resource/Provider.hs @@ -12,6 +12,7 @@ -- module Hakyll.Core.Resource.Provider ( ResourceProvider (..) + , resourceList , makeResourceProvider , resourceExists , resourceDigest @@ -22,6 +23,7 @@ import Control.Applicative ((<$>)) import Control.Concurrent (MVar, readMVar, modifyMVar_, newMVar) import Data.Map (Map) import qualified Data.Map as M +import qualified Data.Set as S import Data.Time (UTCTime) import qualified Crypto.Hash.MD5 as MD5 @@ -35,7 +37,7 @@ import Hakyll.Core.Resource -- data ResourceProvider = ResourceProvider { -- | A list of all resources this provider is able to provide - resourceList :: [Resource] + resourceSet :: S.Set Resource , -- | Retrieve a certain resource as string resourceString :: Resource -> IO String , -- | Retrieve a certain resource as lazy bytestring @@ -53,12 +55,16 @@ makeResourceProvider :: [Resource] -- ^ Resource list -> (Resource -> IO LB.ByteString) -- ^ ByteString reader -> (Resource -> IO UTCTime) -- ^ Time checker -> IO ResourceProvider -- ^ Resulting provider -makeResourceProvider l s b t = ResourceProvider l s b t <$> newMVar M.empty +makeResourceProvider l s b t = ResourceProvider (S.fromList l) s b t <$> newMVar M.empty + +-- | Get the list of all resources +resourceList :: ResourceProvider -> [Resource] +resourceList = S.toList . resourceSet -- | Check if a given identifier has a resource -- resourceExists :: ResourceProvider -> Resource -> Bool -resourceExists provider = flip elem $ resourceList provider +resourceExists provider = flip S.member $ resourceSet provider -- | Retrieve a digest for a given resource -- -- cgit v1.2.3 From d49694f58b6e75f76bb4dba6f3640161aa742df5 Mon Sep 17 00:00:00 2001 From: Andrew Miller Date: Wed, 8 Aug 2012 11:41:29 +1200 Subject: Use lrucache to limit the in-memory resource cache to the 500 most recently used items. This stops swap being used on sites where not all the resources will fit in memory, but ensures that frequently used resources like templates stay in the cache. This drastically improves performance in such cases. --- hakyll.cabal | 5 ++++- src/Hakyll/Core/Store.hs | 29 +++++++++++++++++------------ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/hakyll.cabal b/hakyll.cabal index 2d06d28..271439c 100644 --- a/hakyll.cabal +++ b/hakyll.cabal @@ -60,6 +60,7 @@ Flag unixFilter Library Ghc-Options: -Wall + Ghc-Prof-Options: -auto-all -caf-all Hs-Source-Dirs: src Build-Depends: @@ -84,7 +85,8 @@ Library regex-tdfa >= 1.1 && < 1.2, tagsoup >= 0.12.6 && < 0.13, text >= 0.11 && < 1.12, - time >= 1.1 && < 1.5 + time >= 1.1 && < 1.5, + lrucache >= 1.1.1 && < 1.2 Exposed-Modules: Hakyll @@ -192,6 +194,7 @@ Test-suite hakyll-tests tagsoup >= 0.12.6 && < 0.13, text >= 0.11 && < 1.12, time >= 1.1 && < 1.5, + lrucache >= 1.1.1 && < 1.2, unix >= 2.4 && < 2.6 Other-modules: diff --git a/src/Hakyll/Core/Store.hs b/src/Hakyll/Core/Store.hs index 9c1b9ba..0b5f438 100644 --- a/src/Hakyll/Core/Store.hs +++ b/src/Hakyll/Core/Store.hs @@ -22,6 +22,7 @@ import Data.Typeable (Typeable, TypeRep, cast, typeOf) import Hakyll.Core.Identifier import Hakyll.Core.Util.File +import qualified Data.Cache.LRU.IO as LRU -- | Items we can store -- @@ -40,40 +41,44 @@ data Store = Store { -- | All items are stored on the filesystem storeDirectory :: FilePath , -- | And some items are also kept in-memory - storeMap :: Maybe (MVar (Map FilePath Storable)) + storeLRU :: Maybe (LRU.AtomicLRU FilePath Storable) } +-- | The size of the in-memory cache to use in items. +storeLRUSize :: Maybe Integer +storeLRUSize = Just 500 + -- | Initialize the store -- makeStore :: Bool -- ^ Use in-memory caching -> FilePath -- ^ Directory to use for hard disk storage -> IO Store -- ^ Store makeStore inMemory directory = do - mvar <- if inMemory then Just <$> newMVar M.empty else return Nothing + lru <- if inMemory then Just <$> LRU.newAtomicLRU storeLRUSize else return Nothing return Store { storeDirectory = directory - , storeMap = mvar + , storeLRU = lru } -- | Auxiliary: add an item to the map -- cacheInsert :: (Binary a, Typeable a) => Store -> FilePath -> a -> IO () cacheInsert (Store _ Nothing) _ _ = return () -cacheInsert (Store _ (Just mv)) path value = - modifyMVar_ mv $ return . M.insert path (Storable value) +cacheInsert (Store _ (Just lru)) path value = + LRU.insert path (Storable value) lru -- | Auxiliary: get an item from the cache -- cacheLookup :: forall a. (Binary a, Typeable a) => Store -> FilePath -> IO (StoreGet a) cacheLookup (Store _ Nothing) _ = return NotFound -cacheLookup (Store _ (Just mv)) path = do - map' <- readMVar mv - case M.lookup path map' of - Nothing -> return NotFound - Just (Storable s) -> return $ case cast s of - Nothing -> WrongType (typeOf s) $ typeOf (undefined :: a) - Just s' -> Found s' +cacheLookup (Store _ (Just lru)) path = do + res <- LRU.lookup path lru + case res of + Nothing -> return NotFound + Just (Storable s) -> return $ case cast s of + Nothing -> WrongType (typeOf s) $ typeOf (undefined :: a) + Just s' -> Found s' -- | Create a path -- -- cgit v1.2.3