Unverified Commit c3820050 authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

feat: use official Grove 0.1.0-alpha release (#3030)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 2a61e29e
...@@ -35,11 +35,11 @@ dependencies: ...@@ -35,11 +35,11 @@ dependencies:
repository: "https://charts.bitnami.com/bitnami" repository: "https://charts.bitnami.com/bitnami"
condition: etcd.enabled condition: etcd.enabled
- name: kai-scheduler - name: kai-scheduler
version: v0.8.4 version: v0.9.2
repository: oci://ghcr.io/nvidia/kai-scheduler repository: oci://ghcr.io/nvidia/kai-scheduler
condition: kai-scheduler.enabled condition: kai-scheduler.enabled
- name: grove-charts - name: grove-charts
alias: grove alias: grove
version: v0.0.0-6e30275 version: v0.1.0-alpha.1
repository: oci://ghcr.io/nvidia/grove repository: oci://ghcr.io/nvidia/grove
condition: grove.enabled condition: grove.enabled
...@@ -119,7 +119,7 @@ rules: ...@@ -119,7 +119,7 @@ rules:
- apiGroups: - apiGroups:
- grove.io - grove.io
resources: resources:
- podgangsets - podcliquesets
verbs: verbs:
- create - create
- delete - delete
......
...@@ -159,7 +159,7 @@ func main() { ...@@ -159,7 +159,7 @@ func main() {
flag.StringVar(&ingressHostSuffix, "ingress-host-suffix", "", flag.StringVar(&ingressHostSuffix, "ingress-host-suffix", "",
"The suffix to use for the ingress host") "The suffix to use for the ingress host")
flag.DurationVar(&groveTerminationDelay, "grove-termination-delay", consts.DefaultGroveTerminationDelay, flag.DurationVar(&groveTerminationDelay, "grove-termination-delay", consts.DefaultGroveTerminationDelay,
"The termination delay for Grove PodGangSets") "The termination delay for Grove PodCliqueSets")
flag.StringVar(&modelExpressURL, "model-express-url", "", flag.StringVar(&modelExpressURL, "model-express-url", "",
"URL of the Model Express server to inject into all pods") "URL of the Model Express server to inject into all pods")
flag.StringVar(&prometheusEndpoint, "prometheus-endpoint", "", flag.StringVar(&prometheusEndpoint, "prometheus-endpoint", "",
......
...@@ -110,7 +110,7 @@ rules: ...@@ -110,7 +110,7 @@ rules:
- apiGroups: - apiGroups:
- grove.io - grove.io
resources: resources:
- podgangsets - podcliquesets
verbs: verbs:
- create - create
- delete - delete
......
...@@ -6,8 +6,9 @@ toolchain go1.24.3 ...@@ -6,8 +6,9 @@ toolchain go1.24.3
require ( require (
emperror.dev/errors v0.8.1 emperror.dev/errors v0.8.1
github.com/NVIDIA/grove/operator/api v0.0.0-20250825164137-da01400261a6 github.com/NVIDIA/grove/operator/api v0.1.0-alpha.1
github.com/bsm/gomega v1.27.10 github.com/bsm/gomega v1.27.10
github.com/go-logr/logr v1.4.2
github.com/google/go-cmp v0.7.0 github.com/google/go-cmp v0.7.0
github.com/imdario/mergo v0.3.6 github.com/imdario/mergo v0.3.6
github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/ginkgo/v2 v2.23.4
...@@ -39,7 +40,6 @@ require ( ...@@ -39,7 +40,6 @@ require (
github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect
......
emperror.dev/errors v0.8.1 h1:UavXZ5cSX/4u9iyvH6aDcuGkVjeexUGJ7Ij7G4VfQT0= emperror.dev/errors v0.8.1 h1:UavXZ5cSX/4u9iyvH6aDcuGkVjeexUGJ7Ij7G4VfQT0=
emperror.dev/errors v0.8.1/go.mod h1:YcRvLPh626Ubn2xqtoprejnA5nFha+TJ+2vew48kWuE= emperror.dev/errors v0.8.1/go.mod h1:YcRvLPh626Ubn2xqtoprejnA5nFha+TJ+2vew48kWuE=
github.com/NVIDIA/grove/operator/api v0.0.0-20250825164137-da01400261a6 h1:JkW8LeRVsQH/YkRTz80T/JxlDgfk0URKgTUKyYKxbso= github.com/NVIDIA/grove/operator/api v0.1.0-alpha.1 h1:4DE6ZGa/3muBa5gk1GtJskMVss6GjeCPpn+xTnR1h9w=
github.com/NVIDIA/grove/operator/api v0.0.0-20250825164137-da01400261a6/go.mod h1:QlsR2wQLj9m/zVEqv5SsCPzyjN2ykYZ0r/NEnDf4WB4= github.com/NVIDIA/grove/operator/api v0.1.0-alpha.1/go.mod h1:QlsR2wQLj9m/zVEqv5SsCPzyjN2ykYZ0r/NEnDf4WB4=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
......
...@@ -71,7 +71,7 @@ type DynamoGraphDeploymentReconciler struct { ...@@ -71,7 +71,7 @@ type DynamoGraphDeploymentReconciler struct {
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/status,verbs=get;update;patch // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/finalizers,verbs=update // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/finalizers,verbs=update
// +kubebuilder:rbac:groups=grove.io,resources=podgangsets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=grove.io,resources=podcliquesets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=grove.io,resources=podcliques/scale,verbs=get;update;patch // +kubebuilder:rbac:groups=grove.io,resources=podcliques/scale,verbs=get;update;patch
// +kubebuilder:rbac:groups=grove.io,resources=podcliquescalinggroups/scale,verbs=get;update;patch // +kubebuilder:rbac:groups=grove.io,resources=podcliquescalinggroups/scale,verbs=get;update;patch
// +kubebuilder:rbac:groups=scheduling.run.ai,resources=queues,verbs=get;list // +kubebuilder:rbac:groups=scheduling.run.ai,resources=queues,verbs=get;list
...@@ -258,12 +258,12 @@ func (r *DynamoGraphDeploymentReconciler) reconcileGroveScaling(ctx context.Cont ...@@ -258,12 +258,12 @@ func (r *DynamoGraphDeploymentReconciler) reconcileGroveScaling(ctx context.Cont
func (r *DynamoGraphDeploymentReconciler) reconcileGroveResources(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) (State, Reason, Message, error) { func (r *DynamoGraphDeploymentReconciler) reconcileGroveResources(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) (State, Reason, Message, error) {
logger := log.FromContext(ctx) logger := log.FromContext(ctx)
// generate the dynamoComponentsDeployments from the config // generate the dynamoComponentsDeployments from the config
groveGangSet, err := dynamo.GenerateGrovePodGangSet(ctx, dynamoDeployment, r.Config, r.DockerSecretRetriever) groveGangSet, err := dynamo.GenerateGrovePodCliqueSet(ctx, dynamoDeployment, r.Config, r.DockerSecretRetriever)
if err != nil { if err != nil {
logger.Error(err, "failed to generate the Grove GangSet") logger.Error(err, "failed to generate the Grove GangSet")
return "", "", "", fmt.Errorf("failed to generate the Grove GangSet: %w", err) return "", "", "", fmt.Errorf("failed to generate the Grove GangSet: %w", err)
} }
_, syncedGroveGangSet, err := commonController.SyncResource(ctx, r, dynamoDeployment, func(ctx context.Context) (*grovev1alpha1.PodGangSet, bool, error) { _, syncedGroveGangSet, err := commonController.SyncResource(ctx, r, dynamoDeployment, func(ctx context.Context) (*grovev1alpha1.PodCliqueSet, bool, error) {
return groveGangSet, false, nil return groveGangSet, false, nil
}) })
if err != nil { if err != nil {
...@@ -421,7 +421,7 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err ...@@ -421,7 +421,7 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
})). })).
WithEventFilter(commonController.EphemeralDeploymentEventFilter(r.Config)) WithEventFilter(commonController.EphemeralDeploymentEventFilter(r.Config))
if r.Config.Grove.Enabled { if r.Config.Grove.Enabled {
ctrlBuilder = ctrlBuilder.Owns(&grovev1alpha1.PodGangSet{}, builder.WithPredicates(predicate.Funcs{ ctrlBuilder = ctrlBuilder.Owns(&grovev1alpha1.PodCliqueSet{}, builder.WithPredicates(predicate.Funcs{
// ignore creation cause we don't want to be called again after we create the pod gang set // ignore creation cause we don't want to be called again after we create the pod gang set
CreateFunc: func(ce event.CreateEvent) bool { return false }, CreateFunc: func(ce event.CreateEvent) bool { return false },
DeleteFunc: func(de event.DeleteEvent) bool { return true }, DeleteFunc: func(de event.DeleteEvent) bool { return true },
......
...@@ -6,7 +6,7 @@ import ( ...@@ -6,7 +6,7 @@ import (
grovev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1" grovev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1"
) )
func CanonicalizePodGangSet(gangSet *grovev1alpha1.PodGangSet) *grovev1alpha1.PodGangSet { func CanonicalizePodCliqueSet(gangSet *grovev1alpha1.PodCliqueSet) *grovev1alpha1.PodCliqueSet {
// sort cliques by name // sort cliques by name
sort.Slice(gangSet.Spec.Template.Cliques, func(i, j int) bool { sort.Slice(gangSet.Spec.Template.Cliques, func(i, j int) bool {
return gangSet.Spec.Template.Cliques[i].Name < gangSet.Spec.Template.Cliques[j].Name return gangSet.Spec.Template.Cliques[i].Name < gangSet.Spec.Template.Cliques[j].Name
......
...@@ -33,7 +33,7 @@ import ( ...@@ -33,7 +33,7 @@ import (
type GroveConfig struct { type GroveConfig struct {
// Enabled is automatically determined by checking if Grove CRDs are installed in the cluster // Enabled is automatically determined by checking if Grove CRDs are installed in the cluster
Enabled bool Enabled bool
// TerminationDelay configures the termination delay for Grove PodGangSets // TerminationDelay configures the termination delay for Grove PodCliqueSets
TerminationDelay time.Duration TerminationDelay time.Duration
} }
......
...@@ -317,16 +317,16 @@ type SecretsRetriever interface { ...@@ -317,16 +317,16 @@ type SecretsRetriever interface {
GetSecrets(namespace, registry string) ([]string, error) GetSecrets(namespace, registry string) ([]string, error)
} }
// applyCliqueStartupDependencies configures StartsAfter dependencies for cliques in a PodGangSet // applyCliqueStartupDependencies configures StartsAfter dependencies for cliques in a PodCliqueSet
// based on the backend framework and multinode deployment patterns. // based on the backend framework and multinode deployment patterns.
// //
// Rules: // Rules:
// - For VLLM and SGLang: worker cliques start after leader clique // - For VLLM and SGLang: worker cliques start after leader clique
// - For TRTLLM: leader clique starts after worker cliques // - For TRTLLM: leader clique starts after worker cliques
// - Only applies to multinode deployments (numberOfNodes > 1) // - Only applies to multinode deployments (numberOfNodes > 1)
// - Sets the PodGangSet StartupType to Explicit if any dependencies are configured // - Sets the PodCliqueSet StartupType to Explicit if any dependencies are configured
func applyCliqueStartupDependencies( func applyCliqueStartupDependencies(
gangSet *grovev1alpha1.PodGangSet, gangSet *grovev1alpha1.PodCliqueSet,
roles []ServiceRole, roles []ServiceRole,
backendFramework BackendFramework, backendFramework BackendFramework,
numberOfNodes int32, numberOfNodes int32,
...@@ -880,14 +880,14 @@ func GeneratePodSpecForComponent( ...@@ -880,14 +880,14 @@ func GeneratePodSpecForComponent(
return podSpec, nil return podSpec, nil
} }
// GenerateGrovePodGangSet generates a Grove PodGangSet for the given deployment, supporting both single-node and multinode cases. // GenerateGrovePodCliqueSet generates a Grove PodCliqueSet for the given deployment, supporting both single-node and multinode cases.
func GenerateGrovePodGangSet( func GenerateGrovePodCliqueSet(
ctx context.Context, ctx context.Context,
dynamoDeployment *v1alpha1.DynamoGraphDeployment, dynamoDeployment *v1alpha1.DynamoGraphDeployment,
controllerConfig controller_common.Config, controllerConfig controller_common.Config,
secretsRetriever SecretsRetriever, secretsRetriever SecretsRetriever,
) (*grovev1alpha1.PodGangSet, error) { ) (*grovev1alpha1.PodCliqueSet, error) {
gangSet := &grovev1alpha1.PodGangSet{} gangSet := &grovev1alpha1.PodCliqueSet{}
gangSet.Name = dynamoDeployment.Name gangSet.Name = dynamoDeployment.Name
gangSet.Namespace = dynamoDeployment.Namespace gangSet.Namespace = dynamoDeployment.Namespace
gangSet.Spec.Replicas = 1 gangSet.Spec.Replicas = 1
...@@ -986,7 +986,7 @@ func GenerateGrovePodGangSet( ...@@ -986,7 +986,7 @@ func GenerateGrovePodGangSet(
gangSet.Spec.Template.PodCliqueScalingGroupConfigs = scalingGroups gangSet.Spec.Template.PodCliqueScalingGroupConfigs = scalingGroups
} }
return controller_common.CanonicalizePodGangSet(gangSet), nil return controller_common.CanonicalizePodCliqueSet(gangSet), nil
} }
func generateLabels(component *v1alpha1.DynamoComponentDeploymentOverridesSpec, dynamoDeployment *v1alpha1.DynamoGraphDeployment, componentName string) (map[string]string, error) { func generateLabels(component *v1alpha1.DynamoComponentDeploymentOverridesSpec, dynamoDeployment *v1alpha1.DynamoGraphDeployment, componentName string) (map[string]string, error) {
......
...@@ -1048,7 +1048,7 @@ func sortEnvVars(envs []corev1.EnvVar) []corev1.EnvVar { ...@@ -1048,7 +1048,7 @@ func sortEnvVars(envs []corev1.EnvVar) []corev1.EnvVar {
return sorted return sorted
} }
func TestGenerateGrovePodGangSet(t *testing.T) { func TestGenerateGrovePodCliqueSet(t *testing.T) {
type args struct { type args struct {
ctx context.Context ctx context.Context
dynamoDeployment *v1alpha1.DynamoGraphDeployment dynamoDeployment *v1alpha1.DynamoGraphDeployment
...@@ -1057,11 +1057,11 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1057,11 +1057,11 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
args args args args
want *grovev1alpha1.PodGangSet want *grovev1alpha1.PodCliqueSet
wantErr bool wantErr bool
}{ }{
{ {
name: "test_generate_grove_pod_gang_set_single_node", name: "test_generate_grove_pod_clique_set_single_node",
args: args{ args: args{
ctx: context.Background(), ctx: context.Background(),
controllerConfig: controller_common.Config{ controllerConfig: controller_common.Config{
...@@ -1220,14 +1220,14 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1220,14 +1220,14 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
}, },
want: &grovev1alpha1.PodGangSet{ want: &grovev1alpha1.PodCliqueSet{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "test-dynamo-graph-deployment", Name: "test-dynamo-graph-deployment",
Namespace: "test-namespace", Namespace: "test-namespace",
}, },
Spec: grovev1alpha1.PodGangSetSpec{ Spec: grovev1alpha1.PodCliqueSetSpec{
Replicas: 1, Replicas: 1,
Template: grovev1alpha1.PodGangSetTemplateSpec{ Template: grovev1alpha1.PodCliqueSetTemplateSpec{
StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeAnyOrder), StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeAnyOrder),
HeadlessServiceConfig: &grovev1alpha1.HeadlessServiceConfig{ HeadlessServiceConfig: &grovev1alpha1.HeadlessServiceConfig{
PublishNotReadyAddresses: true, PublishNotReadyAddresses: true,
...@@ -1737,14 +1737,14 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1737,14 +1737,14 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
}, },
want: &grovev1alpha1.PodGangSet{ want: &grovev1alpha1.PodCliqueSet{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "test-dynamo-graph-deployment", Name: "test-dynamo-graph-deployment",
Namespace: "test-namespace", Namespace: "test-namespace",
}, },
Spec: grovev1alpha1.PodGangSetSpec{ Spec: grovev1alpha1.PodCliqueSetSpec{
Replicas: 1, Replicas: 1,
Template: grovev1alpha1.PodGangSetTemplateSpec{ Template: grovev1alpha1.PodCliqueSetTemplateSpec{
HeadlessServiceConfig: &grovev1alpha1.HeadlessServiceConfig{ HeadlessServiceConfig: &grovev1alpha1.HeadlessServiceConfig{
PublishNotReadyAddresses: true, PublishNotReadyAddresses: true,
}, },
...@@ -2533,14 +2533,14 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2533,14 +2533,14 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
}, },
want: &grovev1alpha1.PodGangSet{ want: &grovev1alpha1.PodCliqueSet{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "test-dynamo-graph-deployment", Name: "test-dynamo-graph-deployment",
Namespace: "test-namespace", Namespace: "test-namespace",
}, },
Spec: grovev1alpha1.PodGangSetSpec{ Spec: grovev1alpha1.PodCliqueSetSpec{
Replicas: 1, Replicas: 1,
Template: grovev1alpha1.PodGangSetTemplateSpec{ Template: grovev1alpha1.PodCliqueSetTemplateSpec{
StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeAnyOrder), StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeAnyOrder),
HeadlessServiceConfig: &grovev1alpha1.HeadlessServiceConfig{ HeadlessServiceConfig: &grovev1alpha1.HeadlessServiceConfig{
PublishNotReadyAddresses: true, PublishNotReadyAddresses: true,
...@@ -3099,9 +3099,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -3099,9 +3099,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
got, err := GenerateGrovePodGangSet(tt.args.ctx, tt.args.dynamoDeployment, tt.args.controllerConfig, nil) got, err := GenerateGrovePodCliqueSet(tt.args.ctx, tt.args.dynamoDeployment, tt.args.controllerConfig, nil)
if (err != nil) != tt.wantErr { if (err != nil) != tt.wantErr {
t.Errorf("GenerateGrovePodGangSet() error = %v, wantErr %v", err, tt.wantErr) t.Errorf("GenerateGrovePodCliqueSet() error = %v, wantErr %v", err, tt.wantErr)
return return
} }
sort.Slice(got.Spec.Template.Cliques, func(i, j int) bool { sort.Slice(got.Spec.Template.Cliques, func(i, j int) bool {
...@@ -3124,7 +3124,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -3124,7 +3124,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
} }
if diff := cmp.Diff(got, tt.want); diff != "" { if diff := cmp.Diff(got, tt.want); diff != "" {
t.Errorf("GenerateGrovePodGangSet() mismatch (-want +got):\n%s", diff) t.Errorf("GenerateGrovePodCliqueSet() mismatch (-want +got):\n%s", diff)
} }
}) })
} }
...@@ -4072,10 +4072,10 @@ func XTestApplyCliqueStartupDependencies(t *testing.T) { ...@@ -4072,10 +4072,10 @@ func XTestApplyCliqueStartupDependencies(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
// Create a PodGangSet with cliques matching the roles // Create a PodCliqueSet with cliques matching the roles
gangSet := &grovev1alpha1.PodGangSet{ gangSet := &grovev1alpha1.PodCliqueSet{
Spec: grovev1alpha1.PodGangSetSpec{ Spec: grovev1alpha1.PodCliqueSetSpec{
Template: grovev1alpha1.PodGangSetTemplateSpec{ Template: grovev1alpha1.PodCliqueSetTemplateSpec{
Cliques: []*grovev1alpha1.PodCliqueTemplateSpec{}, Cliques: []*grovev1alpha1.PodCliqueTemplateSpec{},
}, },
}, },
...@@ -4234,7 +4234,7 @@ func XTestGetCliqueStartupDependencies(t *testing.T) { ...@@ -4234,7 +4234,7 @@ func XTestGetCliqueStartupDependencies(t *testing.T) {
// deactivated for now. // deactivated for now.
// TODO: reactivate this when we have a better way to handle the readiness probe for the leader. // TODO: reactivate this when we have a better way to handle the readiness probe for the leader.
func XTestGenerateGrovePodGangSet_StartsAfterDependencies(t *testing.T) { func XTestGenerateGrovePodCliqueSet_StartsAfterDependencies(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{} secretsRetriever := &mockSecretsRetriever{}
tests := []struct { tests := []struct {
...@@ -4301,9 +4301,9 @@ func XTestGenerateGrovePodGangSet_StartsAfterDependencies(t *testing.T) { ...@@ -4301,9 +4301,9 @@ func XTestGenerateGrovePodGangSet_StartsAfterDependencies(t *testing.T) {
NatsAddress: "nats-address", NatsAddress: "nats-address",
} }
got, err := GenerateGrovePodGangSet(context.Background(), dynamoDeployment, controllerConfig, secretsRetriever) got, err := GenerateGrovePodCliqueSet(context.Background(), dynamoDeployment, controllerConfig, secretsRetriever)
if err != nil { if err != nil {
t.Errorf("GenerateGrovePodGangSet() error = %v", err) t.Errorf("GenerateGrovePodCliqueSet() error = %v", err)
return return
} }
......
...@@ -38,7 +38,7 @@ helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./com ...@@ -38,7 +38,7 @@ helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./com
### Installation using Grove ### Installation using Grove
Same example as above, but using Grove PodGangSet resources. Same example as above, but using Grove PodCliqueSet resources.
```bash ```bash
helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml --set deploymentType=grove helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml --set deploymentType=grove
...@@ -72,10 +72,10 @@ The following table shows which deployment features are supported by the **Helm ...@@ -72,10 +72,10 @@ The following table shows which deployment features are supported by the **Helm
| Feature | Helm Chart | Operator | Description | | Feature | Helm Chart | Operator | Description |
|---------|------------|----------|-------------| |---------|------------|----------|-------------|
| **Singlenode** (k8sDeployments) | ✅ Supported | ✅ Supported | Single-node deployments using standard Kubernetes Deployments | | **Singlenode** (k8sDeployments) | ✅ Supported | ✅ Supported | Single-node deployments using standard Kubernetes Deployments |
| **Singlenode** (Grove PodGangSet) | ✅ Supported | ✅ Supported | Single-node deployments using Grove PodGangSet resources | | **Singlenode** (Grove PodCliqueSet) | ✅ Supported | ✅ Supported | Single-node deployments using Grove PodCliqueSet resources |
| **Multinode** (Grove PodGangSet and LWS) | ❌ Not Supported | ✅ Supported | Multi-node deployments requiring Grove PodGangSet and LeaderWorkerSet (LWS) | | **Multinode** (Grove PodCliqueSet and LWS) | ❌ Not Supported | ✅ Supported | Multi-node deployments requiring Grove PodCliqueSet and LeaderWorkerSet (LWS) |
**Key Differences:** **Key Differences:**
- **Helm Chart**: Best for simple single-node deployments and quick testing. Supports both basic Kubernetes deployments and Grove PodGangSet resources. - **Helm Chart**: Best for simple single-node deployments and quick testing. Supports both basic Kubernetes deployments and Grove PodCliqueSet resources.
- **Operator**: Required for advanced multi-node deployments. Provides full feature support including complex distributed inference configurations. - **Operator**: Required for advanced multi-node deployments. Provides full feature support including complex distributed inference configurations.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment