summaryrefslogtreecommitdiff
path: root/src/Hakyll/Core/Provider/Metadata.hs
blob: 6285ce17b74ca93ac41b71154073e6d0684189e6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
--------------------------------------------------------------------------------
-- | Internal module to parse metadata
{-# LANGUAGE BangPatterns    #-}
{-# LANGUAGE RecordWildCards #-}
module Hakyll.Core.Provider.Metadata
    ( loadMetadata
    , parsePage

    , MetadataException (..)
    ) where


--------------------------------------------------------------------------------
import           Control.Arrow                 (second)
import           Control.Exception             (Exception, throwIO)
import           Control.Monad                 (guard)
import qualified Data.ByteString               as B
import qualified Data.ByteString.Char8         as BC
import           Data.List.Extended            (breakWhen)
import qualified Data.Map                      as M
import           Data.Maybe                    (fromMaybe)
import           Data.Monoid                   ((<>))
import qualified Data.Text                     as T
import qualified Data.Text.Encoding            as T
import qualified Data.Yaml                     as Yaml
import           Hakyll.Core.Identifier
import           Hakyll.Core.Metadata
import           Hakyll.Core.Provider.Internal
import           System.IO                     as IO


--------------------------------------------------------------------------------
loadMetadata :: Provider -> Identifier -> IO (Metadata, Maybe String)
loadMetadata p identifier = do
    hasHeader  <- probablyHasMetadataHeader fp
    (md, body) <- if hasHeader
        then second Just <$> loadMetadataHeader fp
        else return (mempty, Nothing)

    emd <- case mi of
        Nothing  -> return mempty
        Just mi' -> loadMetadataFile $ resourceFilePath p mi'

    return (md <> emd, body)
  where
    normal = setVersion Nothing identifier
    fp     = resourceFilePath p identifier
    mi     = M.lookup normal (providerFiles p) >>= resourceInfoMetadata


--------------------------------------------------------------------------------
loadMetadataHeader :: FilePath -> IO (Metadata, String)
loadMetadataHeader fp = do
    fileContent <- readFile fp
    case parsePage fileContent of
        Right x   -> return x
        Left  err -> throwIO $ MetadataException fp err


--------------------------------------------------------------------------------
loadMetadataFile :: FilePath -> IO Metadata
loadMetadataFile fp = do
    fileContent <- B.readFile fp
    let errOrMeta = Yaml.decodeEither' fileContent
    either (fail . show) return errOrMeta


--------------------------------------------------------------------------------
-- | Check if a file "probably" has a metadata header. The main goal of this is
-- to exclude binary files (which are unlikely to start with "---").
probablyHasMetadataHeader :: FilePath -> IO Bool
probablyHasMetadataHeader fp = do
    handle <- IO.openFile fp IO.ReadMode
    bs     <- BC.hGet handle 1024
    IO.hClose handle
    return $ isMetadataHeader bs
  where
    isMetadataHeader bs =
        let pre = BC.takeWhile (\x -> x /= '\n' && x /= '\r') bs
        in  BC.length pre >= 3 && BC.all (== '-') pre


--------------------------------------------------------------------------------
-- | Parse the page metadata and body.
splitMetadata :: String -> (Maybe String, String)
splitMetadata str0 = fromMaybe (Nothing, str0) $ do
    guard $ leading >= 3
    let !str1 = drop leading str0
    guard $ all isNewline (take 1 str1)
    let !(!meta, !content0) = breakWhen isTrailing str1
    guard $ not $ null content0
    let !content1 = drop (leading + 1) content0
        !content2 = dropWhile isNewline $ dropWhile isInlineSpace content1
    -- Adding this newline fixes the line numbers reported by the YAML parser.
    -- It's a bit ugly but it works.
    return (Just ('\n' : meta), content2)
  where
    -- Parse the leading "---"
    !leading = length $ takeWhile (== '-') str0

    -- Predicate to recognize the trailing "---" or "..."
    isTrailing []       = False
    isTrailing (x : xs) =
        isNewline x && length (takeWhile isDash xs) == leading

    -- Characters
    isNewline     c = c == '\n' || c == '\r'
    isDash        c = c == '-'  || c == '.'
    isInlineSpace c = c == '\t' || c == ' '


--------------------------------------------------------------------------------
parseMetadata :: String -> Either Yaml.ParseException Metadata
parseMetadata = Yaml.decodeEither' . T.encodeUtf8 . T.pack


--------------------------------------------------------------------------------
parsePage :: String -> Either Yaml.ParseException (Metadata, String)
parsePage fileContent = case mbMetaBlock of
    Nothing        -> return (mempty, content)
    Just metaBlock -> case parseMetadata metaBlock of
        Left  err  -> Left   err
        Right meta -> return (meta, content)
  where
    !(!mbMetaBlock, !content) = splitMetadata fileContent


--------------------------------------------------------------------------------
-- | Thrown in the IO monad if things go wrong. Provides a nice-ish error
-- message.
data MetadataException = MetadataException FilePath Yaml.ParseException


--------------------------------------------------------------------------------
instance Exception MetadataException


--------------------------------------------------------------------------------
instance Show MetadataException where
    show (MetadataException fp err) =
        fp ++ ": " ++ Yaml.prettyPrintParseException err ++ hint

      where
        hint = case err of
            Yaml.InvalidYaml (Just (Yaml.YamlParseException {..}))
                | yamlProblem == problem -> "\n" ++
                    "Hint: if the metadata value contains characters such\n" ++
                    "as ':' or '-', try enclosing it in quotes."
            _ -> ""

        problem = "mapping values are not allowed in this context"