summaryrefslogtreecommitdiff
path: root/lib/Hakyll/Core/Identifier/Pattern.hs
diff options
context:
space:
mode:
authorJasper Van der Jeugt <m@jaspervdj.be>2017-06-19 11:57:23 +0200
committerJasper Van der Jeugt <m@jaspervdj.be>2017-06-19 11:57:23 +0200
commit67ecff7ad383640bc73d64edc2506c7cc648a134 (patch)
tree6d328e43c3ab86c29a2d775fabaa23618c16fb51 /lib/Hakyll/Core/Identifier/Pattern.hs
parent2df3209bafa08e6b77ee4a8598fc503269513527 (diff)
downloadhakyll-67ecff7ad383640bc73d64edc2506c7cc648a134.tar.gz
Move src/ to lib/, put Init.hs in src/
Diffstat (limited to 'lib/Hakyll/Core/Identifier/Pattern.hs')
-rw-r--r--lib/Hakyll/Core/Identifier/Pattern.hs322
1 files changed, 322 insertions, 0 deletions
diff --git a/lib/Hakyll/Core/Identifier/Pattern.hs b/lib/Hakyll/Core/Identifier/Pattern.hs
new file mode 100644
index 0000000..47ad21b
--- /dev/null
+++ b/lib/Hakyll/Core/Identifier/Pattern.hs
@@ -0,0 +1,322 @@
+--------------------------------------------------------------------------------
+-- | As 'Identifier' is used to specify a single item, a 'Pattern' is used to
+-- specify a list of items.
+--
+-- In most cases, globs are used for patterns.
+--
+-- A very simple pattern of such a pattern is @\"foo\/bar\"@. This pattern will
+-- only match the exact @foo\/bar@ identifier.
+--
+-- To match more than one identifier, there are different captures that one can
+-- use:
+--
+-- * @\"*\"@: matches at most one element of an identifier;
+--
+-- * @\"**\"@: matches one or more elements of an identifier.
+--
+-- Some examples:
+--
+-- * @\"foo\/*\"@ will match @\"foo\/bar\"@ and @\"foo\/foo\"@, but not
+-- @\"foo\/bar\/qux\"@;
+--
+-- * @\"**\"@ will match any identifier;
+--
+-- * @\"foo\/**\"@ will match @\"foo\/bar\"@ and @\"foo\/bar\/qux\"@, but not
+-- @\"bar\/foo\"@;
+--
+-- * @\"foo\/*.html\"@ will match all HTML files in the @\"foo\/\"@ directory.
+--
+-- The 'capture' function allows the user to get access to the elements captured
+-- by the capture elements in the pattern.
+module Hakyll.Core.Identifier.Pattern
+ ( -- * The pattern type
+ Pattern
+
+ -- * Creating patterns
+ , fromGlob
+ , fromList
+ , fromRegex
+ , fromVersion
+ , hasVersion
+ , hasNoVersion
+
+ -- * Composing patterns
+ , (.&&.)
+ , (.||.)
+ , complement
+
+ -- * Applying patterns
+ , matches
+ , filterMatches
+
+ -- * Capturing strings
+ , capture
+ , fromCapture
+ , fromCaptures
+ ) where
+
+
+--------------------------------------------------------------------------------
+import Control.Arrow ((&&&), (>>>))
+import Control.Monad (msum)
+import Data.Binary (Binary (..), getWord8, putWord8)
+import Data.List (inits, isPrefixOf, tails)
+import Data.Maybe (isJust)
+import Data.Set (Set)
+import qualified Data.Set as S
+
+
+--------------------------------------------------------------------------------
+import GHC.Exts (IsString, fromString)
+import Text.Regex.TDFA ((=~))
+
+
+--------------------------------------------------------------------------------
+import Hakyll.Core.Identifier
+
+
+--------------------------------------------------------------------------------
+-- | Elements of a glob pattern
+data GlobComponent
+ = Capture
+ | CaptureMany
+ | Literal String
+ deriving (Eq, Show)
+
+
+--------------------------------------------------------------------------------
+instance Binary GlobComponent where
+ put Capture = putWord8 0
+ put CaptureMany = putWord8 1
+ put (Literal s) = putWord8 2 >> put s
+
+ get = getWord8 >>= \t -> case t of
+ 0 -> pure Capture
+ 1 -> pure CaptureMany
+ 2 -> Literal <$> get
+ _ -> error "Data.Binary.get: Invalid GlobComponent"
+
+
+--------------------------------------------------------------------------------
+-- | Type that allows matching on identifiers
+data Pattern
+ = Everything
+ | Complement Pattern
+ | And Pattern Pattern
+ | Glob [GlobComponent]
+ | List (Set Identifier)
+ | Regex String
+ | Version (Maybe String)
+ deriving (Show)
+
+
+--------------------------------------------------------------------------------
+instance Binary Pattern where
+ put Everything = putWord8 0
+ put (Complement p) = putWord8 1 >> put p
+ put (And x y) = putWord8 2 >> put x >> put y
+ put (Glob g) = putWord8 3 >> put g
+ put (List is) = putWord8 4 >> put is
+ put (Regex r) = putWord8 5 >> put r
+ put (Version v) = putWord8 6 >> put v
+
+ get = getWord8 >>= \t -> case t of
+ 0 -> pure Everything
+ 1 -> Complement <$> get
+ 2 -> And <$> get <*> get
+ 3 -> Glob <$> get
+ 4 -> List <$> get
+ 5 -> Regex <$> get
+ _ -> Version <$> get
+
+
+--------------------------------------------------------------------------------
+instance IsString Pattern where
+ fromString = fromGlob
+
+
+--------------------------------------------------------------------------------
+instance Monoid Pattern where
+ mempty = Everything
+ mappend = (.&&.)
+
+
+--------------------------------------------------------------------------------
+-- | Parse a pattern from a string
+fromGlob :: String -> Pattern
+fromGlob = Glob . parse'
+ where
+ parse' str =
+ let (chunk, rest) = break (`elem` "\\*") str
+ in case rest of
+ ('\\' : x : xs) -> Literal (chunk ++ [x]) : parse' xs
+ ('*' : '*' : xs) -> Literal chunk : CaptureMany : parse' xs
+ ('*' : xs) -> Literal chunk : Capture : parse' xs
+ xs -> Literal chunk : Literal xs : []
+
+
+--------------------------------------------------------------------------------
+-- | Create a 'Pattern' from a list of 'Identifier's it should match.
+--
+-- /Warning/: use this carefully with 'hasNoVersion' and 'hasVersion'. The
+-- 'Identifier's in the list /already/ have versions assigned, and the pattern
+-- will then only match the intersection of both versions.
+--
+-- A more concrete example,
+--
+-- > fromList ["foo.markdown"] .&&. hasVersion "pdf"
+--
+-- will not match anything! The @"foo.markdown"@ 'Identifier' has no version
+-- assigned, so the LHS of '.&&.' will only match this 'Identifier' with no
+-- version. The RHS only matches 'Identifier's with version set to @"pdf"@ --
+-- hence, this pattern matches nothing.
+--
+-- The correct way to use this is:
+--
+-- > fromList $ map (setVersion $ Just "pdf") ["foo.markdown"]
+fromList :: [Identifier] -> Pattern
+fromList = List . S.fromList
+
+
+--------------------------------------------------------------------------------
+-- | Create a 'Pattern' from a regex
+--
+-- Example:
+--
+-- > regex "^foo/[^x]*$
+fromRegex :: String -> Pattern
+fromRegex = Regex
+
+
+--------------------------------------------------------------------------------
+-- | Create a pattern which matches all items with the given version.
+fromVersion :: Maybe String -> Pattern
+fromVersion = Version
+
+
+--------------------------------------------------------------------------------
+-- | Specify a version, e.g.
+--
+-- > "foo/*.markdown" .&&. hasVersion "pdf"
+hasVersion :: String -> Pattern
+hasVersion = fromVersion . Just
+
+
+--------------------------------------------------------------------------------
+-- | Match only if the identifier has no version set, e.g.
+--
+-- > "foo/*.markdown" .&&. hasNoVersion
+hasNoVersion :: Pattern
+hasNoVersion = fromVersion Nothing
+
+
+--------------------------------------------------------------------------------
+-- | '&&' for patterns: the given identifier must match both subterms
+(.&&.) :: Pattern -> Pattern -> Pattern
+x .&&. y = And x y
+infixr 3 .&&.
+
+
+--------------------------------------------------------------------------------
+-- | '||' for patterns: the given identifier must match any subterm
+(.||.) :: Pattern -> Pattern -> Pattern
+x .||. y = complement (complement x `And` complement y) -- De Morgan's law
+infixr 2 .||.
+
+
+--------------------------------------------------------------------------------
+-- | Inverts a pattern, e.g.
+--
+-- > complement "foo/bar.html"
+--
+-- will match /anything/ except @\"foo\/bar.html\"@
+complement :: Pattern -> Pattern
+complement = Complement
+
+
+--------------------------------------------------------------------------------
+-- | Check if an identifier matches a pattern
+matches :: Pattern -> Identifier -> Bool
+matches Everything _ = True
+matches (Complement p) i = not $ matches p i
+matches (And x y) i = matches x i && matches y i
+matches (Glob p) i = isJust $ capture (Glob p) i
+matches (List l) i = i `S.member` l
+matches (Regex r) i = toFilePath i =~ r
+matches (Version v) i = identifierVersion i == v
+
+
+--------------------------------------------------------------------------------
+-- | Given a list of identifiers, retain only those who match the given pattern
+filterMatches :: Pattern -> [Identifier] -> [Identifier]
+filterMatches = filter . matches
+
+
+--------------------------------------------------------------------------------
+-- | Split a list at every possible point, generate a list of (init, tail)
+-- cases. The result is sorted with inits decreasing in length.
+splits :: [a] -> [([a], [a])]
+splits = inits &&& tails >>> uncurry zip >>> reverse
+
+
+--------------------------------------------------------------------------------
+-- | Match a glob against a pattern, generating a list of captures
+capture :: Pattern -> Identifier -> Maybe [String]
+capture (Glob p) i = capture' p (toFilePath i)
+capture _ _ = Nothing
+
+
+--------------------------------------------------------------------------------
+-- | Internal verion of 'capture'
+capture' :: [GlobComponent] -> String -> Maybe [String]
+capture' [] [] = Just [] -- An empty match
+capture' [] _ = Nothing -- No match
+capture' (Literal l : ms) str
+ -- Match the literal against the string
+ | l `isPrefixOf` str = capture' ms $ drop (length l) str
+ | otherwise = Nothing
+capture' (Capture : ms) str =
+ -- Match until the next /
+ let (chunk, rest) = break (== '/') str
+ in msum $ [ fmap (i :) (capture' ms (t ++ rest)) | (i, t) <- splits chunk ]
+capture' (CaptureMany : ms) str =
+ -- Match everything
+ msum $ [ fmap (i :) (capture' ms t) | (i, t) <- splits str ]
+
+
+--------------------------------------------------------------------------------
+-- | Create an identifier from a pattern by filling in the captures with a given
+-- string
+--
+-- Example:
+--
+-- > fromCapture (fromGlob "tags/*") "foo"
+--
+-- Result:
+--
+-- > "tags/foo"
+fromCapture :: Pattern -> String -> Identifier
+fromCapture pattern = fromCaptures pattern . repeat
+
+
+--------------------------------------------------------------------------------
+-- | Create an identifier from a pattern by filling in the captures with the
+-- given list of strings
+fromCaptures :: Pattern -> [String] -> Identifier
+fromCaptures (Glob p) = fromFilePath . fromCaptures' p
+fromCaptures _ = error $
+ "Hakyll.Core.Identifier.Pattern.fromCaptures: fromCaptures only works " ++
+ "on simple globs!"
+
+
+--------------------------------------------------------------------------------
+-- | Internally used version of 'fromCaptures'
+fromCaptures' :: [GlobComponent] -> [String] -> String
+fromCaptures' [] _ = mempty
+fromCaptures' (m : ms) [] = case m of
+ Literal l -> l `mappend` fromCaptures' ms []
+ _ -> error $ "Hakyll.Core.Identifier.Pattern.fromCaptures': "
+ ++ "identifier list exhausted"
+fromCaptures' (m : ms) ids@(i : is) = case m of
+ Literal l -> l `mappend` fromCaptures' ms ids
+ _ -> i `mappend` fromCaptures' ms is