--------------------------------------------------------------------------------
-- | As 'Identifier' is used to specify a single item, a 'Pattern' is used to
-- specify a list of items.
--
-- In most cases, globs are used for patterns.
--
-- A very simple pattern of such a pattern is @\"foo\/bar\"@. This pattern will
-- only match the exact @foo\/bar@ identifier.
--
-- To match more than one identifier, there are different captures that one can
-- use:
--
-- * @\"*\"@: matches at most one element of an identifier;
--
-- * @\"**\"@: matches one or more elements of an identifier.
--
-- Some examples:
--
-- * @\"foo\/*\"@ will match @\"foo\/bar\"@ and @\"foo\/foo\"@, but not
--   @\"foo\/bar\/qux\"@;
--
-- * @\"**\"@ will match any identifier;
--
-- * @\"foo\/**\"@ will match @\"foo\/bar\"@ and @\"foo\/bar\/qux\"@, but not
--   @\"bar\/foo\"@;
--
-- * @\"foo\/*.html\"@ will match all HTML files in the @\"foo\/\"@ directory.
--
-- The 'capture' function allows the user to get access to the elements captured
-- by the capture elements in a glob or regex pattern.
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Hakyll.Core.Identifier.Pattern
    ( -- * The pattern type
      Pattern

      -- * Creating patterns
    , fromGlob
    , fromList
    , fromRegex
    , fromVersion
    , hasVersion
    , hasNoVersion

      -- * Composing patterns
    , (.&&.)
    , (.||.)
    , complement

      -- * Applying patterns
    , matches
    , filterMatches

      -- * Capturing strings
    , capture
    , fromCapture
    , fromCaptures
    ) where


--------------------------------------------------------------------------------
import           Control.Arrow                           ((&&&), (>>>))
import           Control.Monad                           (msum)
import           Data.List                               (inits, isPrefixOf,
                                                          tails)
import           Data.Maybe                              (isJust)
import qualified Data.Set                                as S
import           System.FilePath                         (normalise, pathSeparator)


--------------------------------------------------------------------------------
import           GHC.Exts                                (IsString, fromString)
import           Text.Regex.TDFA                         ((=~))


--------------------------------------------------------------------------------
import           Hakyll.Core.Identifier
import           Hakyll.Core.Identifier.Pattern.Internal
import           Hakyll.Core.Util.String                 (removeWinPathSeparator)


--------------------------------------------------------------------------------
instance IsString Pattern where
    fromString :: String -> Pattern
fromString = String -> Pattern
fromGlob


--------------------------------------------------------------------------------
-- | Parse a pattern from a string
fromGlob :: String -> Pattern
fromGlob :: String -> Pattern
fromGlob = [GlobComponent] -> Pattern
Glob ([GlobComponent] -> Pattern)
-> (String -> [GlobComponent]) -> String -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [GlobComponent]
parse' (String -> [GlobComponent])
-> (String -> String) -> String -> [GlobComponent]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
normalise
  where
    parse' :: String -> [GlobComponent]
parse' String
str = 
        let (String
chunk, String
rest) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'*') String
str
        in case String
rest of
            (Char
'*'  : Char
'*' : String
xs) -> String -> GlobComponent
Literal String
chunk GlobComponent -> [GlobComponent] -> [GlobComponent]
forall a. a -> [a] -> [a]
: GlobComponent
CaptureMany GlobComponent -> [GlobComponent] -> [GlobComponent]
forall a. a -> [a] -> [a]
: String -> [GlobComponent]
parse' String
xs
            (Char
'*'  : String
xs)       -> String -> GlobComponent
Literal String
chunk GlobComponent -> [GlobComponent] -> [GlobComponent]
forall a. a -> [a] -> [a]
: GlobComponent
Capture GlobComponent -> [GlobComponent] -> [GlobComponent]
forall a. a -> [a] -> [a]
: String -> [GlobComponent]
parse' String
xs
            String
""                -> String -> GlobComponent
Literal String
chunk GlobComponent -> [GlobComponent] -> [GlobComponent]
forall a. a -> [a] -> [a]
: []
            String
xs                -> String -> GlobComponent
Literal String
chunk GlobComponent -> [GlobComponent] -> [GlobComponent]
forall a. a -> [a] -> [a]
: String -> GlobComponent
Literal String
xs GlobComponent -> [GlobComponent] -> [GlobComponent]
forall a. a -> [a] -> [a]
: []


--------------------------------------------------------------------------------
-- | Create a 'Pattern' from a list of 'Identifier's it should match.
--
-- /Warning/: use this carefully with 'hasNoVersion' and 'hasVersion'. The
-- 'Identifier's in the list /already/ have versions assigned, and the pattern
-- will then only match the intersection of both versions.
--
-- A more concrete example,
--
-- > fromList ["foo.markdown"] .&&. hasVersion "pdf"
--
-- will not match anything! The @"foo.markdown"@ 'Identifier' has no version
-- assigned, so the LHS of '.&&.' will only match this 'Identifier' with no
-- version. The RHS only matches 'Identifier's with version set to @"pdf"@ --
-- hence, this pattern matches nothing.
--
-- The correct way to use this is:
--
-- > fromList $ map (setVersion $ Just "pdf") ["foo.markdown"]
fromList :: [Identifier] -> Pattern
fromList :: [Identifier] -> Pattern
fromList = Set Identifier -> Pattern
List (Set Identifier -> Pattern)
-> ([Identifier] -> Set Identifier) -> [Identifier] -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Identifier] -> Set Identifier
forall a. Ord a => [a] -> Set a
S.fromList


--------------------------------------------------------------------------------
-- | Create a 'Pattern' from a regex
--
-- Example:
--
-- > regex "^foo/[^x]*$
fromRegex :: String -> Pattern
fromRegex :: String -> Pattern
fromRegex = String -> Pattern
Regex


--------------------------------------------------------------------------------
-- | Create a pattern which matches all items with the given version.
fromVersion :: Maybe String -> Pattern
fromVersion :: Maybe String -> Pattern
fromVersion = Maybe String -> Pattern
Version


--------------------------------------------------------------------------------
-- | Specify a version, e.g.
--
-- > "foo/*.markdown" .&&. hasVersion "pdf"
hasVersion :: String -> Pattern
hasVersion :: String -> Pattern
hasVersion = Maybe String -> Pattern
fromVersion (Maybe String -> Pattern)
-> (String -> Maybe String) -> String -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe String
forall a. a -> Maybe a
Just


--------------------------------------------------------------------------------
-- | Match only if the identifier has no version set, e.g.
--
-- > "foo/*.markdown" .&&. hasNoVersion
hasNoVersion :: Pattern
hasNoVersion :: Pattern
hasNoVersion = Maybe String -> Pattern
fromVersion Maybe String
forall a. Maybe a
Nothing


--------------------------------------------------------------------------------
-- | '&&' for patterns: the given identifier must match both subterms
(.&&.) :: Pattern -> Pattern -> Pattern
Pattern
x .&&. :: Pattern -> Pattern -> Pattern
.&&. Pattern
y = Pattern -> Pattern -> Pattern
And Pattern
x Pattern
y
infixr 3 .&&.


--------------------------------------------------------------------------------
-- | '||' for patterns: the given identifier must match any subterm
(.||.) :: Pattern -> Pattern -> Pattern
Pattern
x .||. :: Pattern -> Pattern -> Pattern
.||. Pattern
y = Pattern -> Pattern
complement (Pattern -> Pattern
complement Pattern
x Pattern -> Pattern -> Pattern
`And` Pattern -> Pattern
complement Pattern
y)  -- De Morgan's law
infixr 2 .||.


--------------------------------------------------------------------------------
-- | Inverts a pattern, e.g.
--
-- > complement "foo/bar.html"
--
-- will match /anything/ except @\"foo\/bar.html\"@
complement :: Pattern -> Pattern
complement :: Pattern -> Pattern
complement = Pattern -> Pattern
Complement


--------------------------------------------------------------------------------
-- | Check if an identifier matches a pattern
matches :: Pattern -> Identifier -> Bool
matches :: Pattern -> Identifier -> Bool
matches Pattern
Everything     Identifier
_ = Bool
True
matches (Complement Pattern
p) Identifier
i = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Pattern -> Identifier -> Bool
matches Pattern
p Identifier
i
matches (And Pattern
x Pattern
y)      Identifier
i = Pattern -> Identifier -> Bool
matches Pattern
x Identifier
i Bool -> Bool -> Bool
&& Pattern -> Identifier -> Bool
matches Pattern
y Identifier
i
matches (Glob [GlobComponent]
p)       Identifier
i = Maybe [String] -> Bool
forall a. Maybe a -> Bool
isJust (Maybe [String] -> Bool) -> Maybe [String] -> Bool
forall a b. (a -> b) -> a -> b
$ Pattern -> Identifier -> Maybe [String]
capture ([GlobComponent] -> Pattern
Glob [GlobComponent]
p) Identifier
i
matches (List Set Identifier
l)       Identifier
i = Identifier
i Identifier -> Set Identifier -> Bool
forall a. Ord a => a -> Set a -> Bool
`S.member` Set Identifier
l
matches (Regex String
r)      Identifier
i = (String -> String
removeWinPathSeparator (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ Identifier -> String
toFilePath Identifier
i) String -> String -> Bool
forall source source1 target.
(RegexMaker Regex CompOption ExecOption source,
 RegexContext Regex source1 target) =>
source1 -> source -> target
=~ String
r
matches (Version Maybe String
v)    Identifier
i = Identifier -> Maybe String
identifierVersion Identifier
i Maybe String -> Maybe String -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe String
v


--------------------------------------------------------------------------------
-- | Given a list of identifiers, retain only those who match the given pattern
filterMatches :: Pattern -> [Identifier] -> [Identifier]
filterMatches :: Pattern -> [Identifier] -> [Identifier]
filterMatches = (Identifier -> Bool) -> [Identifier] -> [Identifier]
forall a. (a -> Bool) -> [a] -> [a]
filter ((Identifier -> Bool) -> [Identifier] -> [Identifier])
-> (Pattern -> Identifier -> Bool)
-> Pattern
-> [Identifier]
-> [Identifier]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Pattern -> Identifier -> Bool
matches


--------------------------------------------------------------------------------
-- | Split a list at every possible point, generate a list of (init, tail)
-- cases. The result is sorted with inits decreasing in length.
splits :: [a] -> [([a], [a])]
splits :: [a] -> [([a], [a])]
splits = [a] -> [[a]]
forall a. [a] -> [[a]]
inits ([a] -> [[a]]) -> ([a] -> [[a]]) -> [a] -> ([[a]], [[a]])
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& [a] -> [[a]]
forall a. [a] -> [[a]]
tails ([a] -> ([[a]], [[a]]))
-> (([[a]], [[a]]) -> [([a], [a])]) -> [a] -> [([a], [a])]
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> ([[a]] -> [[a]] -> [([a], [a])]) -> ([[a]], [[a]]) -> [([a], [a])]
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry [[a]] -> [[a]] -> [([a], [a])]
forall a b. [a] -> [b] -> [(a, b)]
zip (([[a]], [[a]]) -> [([a], [a])])
-> ([([a], [a])] -> [([a], [a])]) -> ([[a]], [[a]]) -> [([a], [a])]
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [([a], [a])] -> [([a], [a])]
forall a. [a] -> [a]
reverse


--------------------------------------------------------------------------------
-- | Match a glob or regex pattern against an identifier, generating a list of captures
capture :: Pattern -> Identifier -> Maybe [String]
capture :: Pattern -> Identifier -> Maybe [String]
capture (Glob [GlobComponent]
p) Identifier
i = [GlobComponent] -> String -> Maybe [String]
capture' [GlobComponent]
p (Identifier -> String
toFilePath Identifier
i)
capture (Regex String
pat) Identifier
i = [String] -> Maybe [String]
forall a. a -> Maybe a
Just [String]
groups
  where (String
_, String
_, String
_, [String]
groups) = ((String -> String
removeWinPathSeparator (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ Identifier -> String
toFilePath Identifier
i) String -> String -> (String, String, String, [String])
forall source source1 target.
(RegexMaker Regex CompOption ExecOption source,
 RegexContext Regex source1 target) =>
source1 -> source -> target
=~ String
pat) :: (String, String, String, [String])
capture Pattern
_        Identifier
_ = Maybe [String]
forall a. Maybe a
Nothing


--------------------------------------------------------------------------------
-- | Internal verion of 'capture'
capture' :: [GlobComponent] -> String -> Maybe [String]
capture' :: [GlobComponent] -> String -> Maybe [String]
capture' [] [] = [String] -> Maybe [String]
forall a. a -> Maybe a
Just []  -- An empty match
capture' [] String
_  = Maybe [String]
forall a. Maybe a
Nothing  -- No match
capture' (Literal String
l : [GlobComponent]
ms) String
str
    -- Match the literal against the string
    | String
l String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
str = [GlobComponent] -> String -> Maybe [String]
capture' [GlobComponent]
ms (String -> Maybe [String]) -> String -> Maybe [String]
forall a b. (a -> b) -> a -> b
$ Int -> String -> String
forall a. Int -> [a] -> [a]
drop (String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
l) String
str
    | Bool
otherwise          = Maybe [String]
forall a. Maybe a
Nothing
capture' (GlobComponent
Capture : [GlobComponent]
ms) String
str =
    -- Match until the next path separator
    let (String
chunk, String
rest) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
pathSeparator) String
str
    in [Maybe [String]] -> Maybe [String]
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum ([Maybe [String]] -> Maybe [String])
-> [Maybe [String]] -> Maybe [String]
forall a b. (a -> b) -> a -> b
$ [ ([String] -> [String]) -> Maybe [String] -> Maybe [String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String
i String -> [String] -> [String]
forall a. a -> [a] -> [a]
:) ([GlobComponent] -> String -> Maybe [String]
capture' [GlobComponent]
ms (String
t String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
rest)) | (String
i, String
t) <- String -> [(String, String)]
forall a. [a] -> [([a], [a])]
splits String
chunk ]
capture' (GlobComponent
CaptureMany : [GlobComponent]
ms) String
str =
    -- Match everything
    [Maybe [String]] -> Maybe [String]
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum ([Maybe [String]] -> Maybe [String])
-> [Maybe [String]] -> Maybe [String]
forall a b. (a -> b) -> a -> b
$ [ ([String] -> [String]) -> Maybe [String] -> Maybe [String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String
i String -> [String] -> [String]
forall a. a -> [a] -> [a]
:) ([GlobComponent] -> String -> Maybe [String]
capture' [GlobComponent]
ms String
t) | (String
i, String
t) <- String -> [(String, String)]
forall a. [a] -> [([a], [a])]
splits String
str ]


--------------------------------------------------------------------------------
-- | Create an identifier from a pattern by filling in the captures with a given
-- string
--
-- Example:
--
-- > fromCapture (fromGlob "tags/*") "foo"
--
-- Result:
--
-- > "tags/foo"
fromCapture :: Pattern -> String -> Identifier
fromCapture :: Pattern -> String -> Identifier
fromCapture Pattern
pattern = Pattern -> [String] -> Identifier
fromCaptures Pattern
pattern ([String] -> Identifier)
-> (String -> [String]) -> String -> Identifier
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
forall a. a -> [a]
repeat


--------------------------------------------------------------------------------
-- | Create an identifier from a pattern by filling in the captures with the
-- given list of strings
fromCaptures :: Pattern -> [String] -> Identifier
fromCaptures :: Pattern -> [String] -> Identifier
fromCaptures (Glob [GlobComponent]
p) = String -> Identifier
fromFilePath (String -> Identifier)
-> ([String] -> String) -> [String] -> Identifier
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [GlobComponent] -> [String] -> String
fromCaptures' [GlobComponent]
p
fromCaptures Pattern
_        = String -> [String] -> Identifier
forall a. HasCallStack => String -> a
error (String -> [String] -> Identifier)
-> String -> [String] -> Identifier
forall a b. (a -> b) -> a -> b
$
    String
"Hakyll.Core.Identifier.Pattern.fromCaptures: fromCaptures only works " String -> String -> String
forall a. [a] -> [a] -> [a]
++
    String
"on simple globs!"


--------------------------------------------------------------------------------
-- | Internally used version of 'fromCaptures'
fromCaptures' :: [GlobComponent] -> [String] -> String
fromCaptures' :: [GlobComponent] -> [String] -> String
fromCaptures' []        [String]
_ = String
forall a. Monoid a => a
mempty
fromCaptures' (GlobComponent
m : [GlobComponent]
ms) [] = case GlobComponent
m of
    Literal String
l -> String
l String -> String -> String
forall a. Monoid a => a -> a -> a
`mappend` [GlobComponent] -> [String] -> String
fromCaptures' [GlobComponent]
ms []
    GlobComponent
_         -> String -> String
forall a. HasCallStack => String -> a
error (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$  String
"Hakyll.Core.Identifier.Pattern.fromCaptures': "
                       String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"identifier list exhausted"
fromCaptures' (GlobComponent
m : [GlobComponent]
ms) ids :: [String]
ids@(String
i : [String]
is) = case GlobComponent
m of
    Literal String
l -> String
l String -> String -> String
forall a. Monoid a => a -> a -> a
`mappend` [GlobComponent] -> [String] -> String
fromCaptures' [GlobComponent]
ms [String]
ids
    GlobComponent
_         -> String
i String -> String -> String
forall a. Monoid a => a -> a -> a
`mappend` [GlobComponent] -> [String] -> String
fromCaptures' [GlobComponent]
ms [String]
is