-------------------------------------------------------------------------------- -- | Internal module to parse metadata {-# LANGUAGE BangPatterns #-} module Hakyll.Core.Provider.Metadata ( loadMetadata , parsePage ) where -------------------------------------------------------------------------------- import Control.Applicative import Control.Arrow (second) import Control.Monad (guard) import qualified Data.ByteString.Char8 as BC import qualified Data.HashMap.Strict as HMS import Data.List (intercalate) import Data.List.Extended (breakWhen) import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Monoid ((<>)) import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Yaml as Yaml import Hakyll.Core.Identifier import Hakyll.Core.Metadata import Hakyll.Core.Provider.Internal import Hakyll.Core.Util.Parser import Hakyll.Core.Util.String import System.IO as IO -------------------------------------------------------------------------------- loadMetadata :: Provider -> Identifier -> IO (Metadata, Maybe String) loadMetadata p identifier = do hasHeader <- probablyHasMetadataHeader fp (md, body) <- if hasHeader then second Just <$> loadMetadataHeader fp else return (mempty, Nothing) emd <- case mi of Nothing -> return mempty Just mi' -> loadMetadataFile $ resourceFilePath p mi' return (md <> emd, body) where normal = setVersion Nothing identifier fp = resourceFilePath p identifier mi = M.lookup normal (providerFiles p) >>= resourceInfoMetadata -------------------------------------------------------------------------------- loadMetadataHeader :: FilePath -> IO (Metadata, String) loadMetadataHeader fp = do fileContent <- readFile fp either fail return (parsePage fileContent) -------------------------------------------------------------------------------- loadMetadataFile :: FilePath -> IO Metadata loadMetadataFile fp = do errOrMeta <- Yaml.decodeFileEither fp either (fail . show) return errOrMeta -------------------------------------------------------------------------------- -- | Check if a file "probably" has a metadata header. The main goal of this is -- to exclude binary files (which are unlikely to start with "---"). probablyHasMetadataHeader :: FilePath -> IO Bool probablyHasMetadataHeader fp = do handle <- IO.openFile fp IO.ReadMode bs <- BC.hGet handle 1024 IO.hClose handle return $ isMetadataHeader bs where isMetadataHeader bs = let pre = BC.takeWhile (\x -> x /= '\n' && x /= '\r') bs in BC.length pre >= 3 && BC.all (== '-') pre -------------------------------------------------------------------------------- -- | Parse the page metadata and body. splitMetadata :: String -> (Maybe String, String) splitMetadata str0 = fromMaybe (Nothing, str0) $ do guard $ leading >= 3 let !(!meta, !content0) = breakWhen isTrailing (drop leading str0) guard $ not $ null content0 let !content1 = drop (leading + 1) content0 !content2 = dropWhile isNewline $ dropWhile isInlineSpace content1 return (Just meta, content2) where -- Parse the leading "---" !leading = length $ takeWhile (== '-') str0 -- Predicate to recognize the trailing "---" or "..." isTrailing [] = False isTrailing (x : xs) = isNewline x && length (takeWhile isDash xs) == leading -- Characters isNewline c = c == '\n' || c == '\r' isDash c = c == '-' || c == '.' isInlineSpace c = c == '\t' || c == ' ' -------------------------------------------------------------------------------- parseMetadata :: String -> Either String Metadata parseMetadata = Yaml.decodeEither . T.encodeUtf8 . T.pack -------------------------------------------------------------------------------- parsePage :: String -> Either String (Metadata, String) parsePage fileContent = case mbMetaBlock of Nothing -> return (mempty, content) Just metaBlock -> case parseMetadata metaBlock of Left err -> Left err Right meta -> return (meta, content) where !(!mbMetaBlock, !content) = splitMetadata fileContent