From 08c4d74a2fe9c667e725f2ebb41bc01006a703a1 Mon Sep 17 00:00:00 2001 From: Jasper Van der Jeugt Date: Mon, 4 Oct 2010 01:20:17 +0200 Subject: Separate pandoc from page reading, general caching --- src/Text/Hakyll/CreateContext.hs | 10 ++-- src/Text/Hakyll/Internal/Cache.hs | 22 ++++++++ src/Text/Hakyll/Internal/Page.hs | 97 ++++++++++++++---------------------- src/Text/Hakyll/Internal/Template.hs | 6 ++- src/Text/Hakyll/Pandoc.hs | 48 ++++++++++++++++++ 5 files changed, 116 insertions(+), 67 deletions(-) create mode 100644 src/Text/Hakyll/Pandoc.hs (limited to 'src') diff --git a/src/Text/Hakyll/CreateContext.hs b/src/Text/Hakyll/CreateContext.hs index a38d055..d9499ea 100644 --- a/src/Text/Hakyll/CreateContext.hs +++ b/src/Text/Hakyll/CreateContext.hs @@ -13,21 +13,19 @@ import qualified Data.Map as M import Control.Arrow (second) import Control.Monad (liftM2) import Control.Applicative ((<$>)) +import Control.Arrow ((>>>)) -import Text.Hakyll.File import Text.Hakyll.Context import Text.Hakyll.HakyllAction import Text.Hakyll.Render import Text.Hakyll.Internal.Page +import Text.Hakyll.Pandoc +import Text.Hakyll.Internal.Cache -- | Create a @Context@ from a page file stored on the disk. This is probably -- the most common way to create a @Context@. createPage :: FilePath -> HakyllAction () Context -createPage path = HakyllAction - { actionDependencies = [path] - , actionUrl = Left $ toUrl path - , actionFunction = const (readPage path) - } +createPage path = cacheAction "pages" $ readPageAction path >>> renderAction -- | Create a "custom page" @Context@. -- diff --git a/src/Text/Hakyll/Internal/Cache.hs b/src/Text/Hakyll/Internal/Cache.hs index 2a196f1..b83d9af 100644 --- a/src/Text/Hakyll/Internal/Cache.hs +++ b/src/Text/Hakyll/Internal/Cache.hs @@ -2,14 +2,17 @@ module Text.Hakyll.Internal.Cache ( storeInCache , getFromCache , isCacheMoreRecent + , cacheAction ) where import Control.Monad ((<=<)) import Control.Monad.Reader (liftIO) import Data.Binary +import System.FilePath (()) import Text.Hakyll.File import Text.Hakyll.HakyllMonad (Hakyll) +import Text.Hakyll.HakyllAction -- | We can store all datatypes instantiating @Binary@ to the cache. The cache -- directory is specified by the @HakyllConfiguration@, usually @_cache@. @@ -29,3 +32,22 @@ getFromCache = liftIO . decodeFile <=< toCache -- | Check if a file in the cache is more recent than a number of other files. isCacheMoreRecent :: FilePath -> [FilePath] -> Hakyll Bool isCacheMoreRecent file depends = toCache file >>= flip isFileMoreRecent depends + +-- | Cache an entire arrow +-- +cacheAction :: Binary a + => String + -> HakyllAction () a + -> HakyllAction () a +cacheAction key action = action { actionFunction = const cacheFunction } + where + cacheFunction = do + -- Construct a filename + fileName <- fmap (key ) $ either id (const $ return "unknown") + $ actionUrl action + -- Check the cache + cacheOk <- isCacheMoreRecent fileName $ actionDependencies action + if cacheOk then getFromCache fileName + else do result <- actionFunction action () + storeInCache result fileName + return result diff --git a/src/Text/Hakyll/Internal/Page.hs b/src/Text/Hakyll/Internal/Page.hs index 92fb4fc..9e715d5 100644 --- a/src/Text/Hakyll/Internal/Page.hs +++ b/src/Text/Hakyll/Internal/Page.hs @@ -1,47 +1,29 @@ -- | A module for dealing with @Page@s. This module is mostly internally used. module Text.Hakyll.Internal.Page - ( readPage + ( PageSection (..) + , readPage + , readPageAction ) where -import qualified Data.Map as M import Data.List (isPrefixOf) import Data.Char (isSpace) import Control.Monad.Reader (liftIO) import System.FilePath import Control.Monad.State (State, evalState, get, put) -import Text.Pandoc - -import Text.Hakyll.Context (Context (..)) import Text.Hakyll.File import Text.Hakyll.HakyllMonad +import Text.Hakyll.HakyllAction import Text.Hakyll.Regex (substituteRegex, matchesRegex) import Text.Hakyll.Util (trim) -import Text.Hakyll.Internal.Cache -import Text.Hakyll.Internal.FileType - --- | Get a render function for a given extension. -getRenderFunction :: FileType -> Hakyll (String -> String) -getRenderFunction Html = return id -getRenderFunction Text = return id -getRenderFunction UnknownFileType = return id -getRenderFunction fileType = do - parserState <- askHakyll pandocParserState - writerOptions <- askHakyll pandocWriterOptions - return $ writeHtmlString writerOptions - . readFunction fileType (readOptions parserState fileType) - where - readFunction ReStructuredText = readRST - readFunction LaTeX = readLaTeX - readFunction Markdown = readMarkdown - readFunction LiterateHaskellMarkdown = readMarkdown - readFunction t = error $ "Cannot render " ++ show t - readOptions options LiterateHaskellMarkdown = options - { stateLiterateHaskell = True } - readOptions options _ = options +-- | Page info handle: (key, value, needs rendering) +-- +data PageSection = PageSection {unPageSection :: [(String, String, Bool)]} + deriving (Show) -- | Split a page into sections. +-- splitAtDelimiters :: [String] -> State (Maybe String) [[String]] splitAtDelimiters [] = return [] splitAtDelimiters ls@(x:xs) = do @@ -63,59 +45,54 @@ isPossibleDelimiter :: String -> Bool isPossibleDelimiter = isPrefixOf "---" -- | Read one section of a page. -readSection :: (String -> String) -- ^ Render function. - -> Bool -- ^ If this section is the first section in the page. +-- +readSection :: Bool -- ^ If this section is the first section in the page. -> [String] -- ^ Lines in the section. - -> [(String, String)] -- ^ Key-values extracted. -readSection _ _ [] = [] -readSection renderFunction isFirst ls + -> PageSection -- ^ Key-values extracted. +readSection _ [] = PageSection [] +readSection isFirst ls | not isDelimiter' = body ls - | isNamedDelimiter = readSectionMetaData ls - | isFirst = readSimpleMetaData (drop 1 ls) + | isNamedDelimiter = PageSection $ readSectionMetaData ls + | isFirst = PageSection $ readSimpleMetaData (drop 1 ls) | otherwise = body (drop 1 ls) where isDelimiter' = isPossibleDelimiter (head ls) isNamedDelimiter = head ls `matchesRegex` "^----* *[a-zA-Z0-9][a-zA-Z0-9]*" - body ls' = [("body", renderFunction $ unlines ls')] + body ls' = PageSection [("body", unlines ls', True)] readSimpleMetaData = map readPair . filter (not . all isSpace) readPair = trimPair . break (== ':') - trimPair (key, value) = (trim key, trim $ drop 1 value) + trimPair (key, value) = (trim key, trim (drop 1 value), False) readSectionMetaData [] = [] readSectionMetaData (header:value) = let key = substituteRegex "[^a-zA-Z0-9]" "" header - in [(key, renderFunction $ unlines value)] + in [(key, unlines value, True)] --- | Read a page from a file. Metadata is supported, and if the filename --- has a @.markdown@ extension, it will be rendered using pandoc. -readPageFromFile :: FilePath -> Hakyll Context -readPageFromFile path = do - renderFunction <- getRenderFunction $ getFileType path - let sectionFunctions = map (readSection renderFunction) - (True : repeat False) +-- | Read a page from a file. Metadata is supported. +-- +readPage :: FilePath -> Hakyll [PageSection] +readPage path = do + let sectionFunctions = map readSection $ True : repeat False -- Read file. contents <- liftIO $ readFile path url <- toUrl path let sections = evalState (splitAtDelimiters $ lines contents) Nothing - sectionsData = concat $ zipWith ($) sectionFunctions sections - context = M.fromList $ - ("url", url) : ("path", path) : category ++ sectionsData + sectionsData = zipWith ($) sectionFunctions sections - return $ Context context + return $ PageSection [ ("url", url, False) + , ("path", path, False) + ] : category : sectionsData where category = let dirs = splitDirectories $ takeDirectory path - in [("category", last dirs) | not (null dirs)] + in PageSection [("category", last dirs, False) | not (null dirs)] --- | Read a page. Might fetch it from the cache if available. Otherwise, it will --- read it from the file given and store it in the cache. -readPage :: FilePath -> Hakyll Context -readPage path = do - isCacheMoreRecent' <- isCacheMoreRecent fileName [path] - if isCacheMoreRecent' then getFromCache fileName - else do page <- readPageFromFile path - storeInCache page fileName - return page - where - fileName = "pages" path +-- | Read a page from a file. Metadata is supported. +-- +readPageAction :: FilePath -> HakyllAction () [PageSection] +readPageAction path = HakyllAction + { actionDependencies = [path] + , actionUrl = Left $ toUrl path + , actionFunction = const $ readPage path + } diff --git a/src/Text/Hakyll/Internal/Template.hs b/src/Text/Hakyll/Internal/Template.hs index 15a2c8c..9b9d9cf 100644 --- a/src/Text/Hakyll/Internal/Template.hs +++ b/src/Text/Hakyll/Internal/Template.hs @@ -7,6 +7,7 @@ module Text.Hakyll.Internal.Template , finalSubstitute ) where +import Control.Arrow ((>>>)) import Control.Applicative ((<$>)) import Data.List (isPrefixOf) import Data.Char (isAlphaNum) @@ -16,6 +17,8 @@ import qualified Data.Map as M import Text.Hakyll.Context (Context (..)) import Text.Hakyll.HakyllMonad (Hakyll) +import Text.Hakyll.HakyllAction +import Text.Hakyll.Pandoc import Text.Hakyll.Internal.Cache import Text.Hakyll.Internal.Page import Text.Hakyll.Internal.Template.Template @@ -53,7 +56,8 @@ readTemplate path = do where fileName = "templates" path readDefaultTemplate = do - page <- unContext <$> readPage path + page <- unContext <$> + runHakyllAction (readPageAction path >>> renderAction) let body = fromMaybe (error $ "No body in template " ++ fileName) (M.lookup "body" page) return $ fromString body diff --git a/src/Text/Hakyll/Pandoc.hs b/src/Text/Hakyll/Pandoc.hs new file mode 100644 index 0000000..9dbe3d4 --- /dev/null +++ b/src/Text/Hakyll/Pandoc.hs @@ -0,0 +1,48 @@ +module Text.Hakyll.Pandoc where + +import Data.Maybe (fromMaybe) +import qualified Data.Map as M +import Control.Arrow (second) + +import Text.Pandoc + +import Text.Hakyll.Internal.FileType +import Text.Hakyll.Internal.Page +import Text.Hakyll.HakyllMonad +import Text.Hakyll.HakyllAction +import Text.Hakyll.Context + +-- | Get a render function for a given extension. +-- +getRenderFunction :: FileType -> Hakyll (String -> String) +getRenderFunction Html = return id +getRenderFunction Text = return id +getRenderFunction UnknownFileType = return id +getRenderFunction fileType = do + parserState <- askHakyll pandocParserState + writerOptions <- askHakyll pandocWriterOptions + return $ writeHtmlString writerOptions + . readFunction fileType (readOptions parserState fileType) + where + readFunction ReStructuredText = readRST + readFunction LaTeX = readLaTeX + readFunction Markdown = readMarkdown + readFunction LiterateHaskellMarkdown = readMarkdown + readFunction t = error $ "Cannot render " ++ show t + + readOptions options LiterateHaskellMarkdown = options + { stateLiterateHaskell = True } + readOptions options _ = options + +-- | Path must be there +-- +renderAction :: HakyllAction [PageSection] Context +renderAction = createHakyllAction $ \sections -> do + let triples = unPageSection =<< sections + path = fromMaybe "unknown" $ lookup "path" + $ map (\(x, y, _) -> (x, y)) + $ triples + render' <- getRenderFunction $ getFileType path + let pairs = map (\(k, v, r) -> second (if r then render' else id) (k, v)) + triples + return $ Context $ M.fromList pairs -- cgit v1.2.3