Unverified Commit 55c26654 authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

fix: prevent services adding/removal in dgd (#5240)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 92748c93
...@@ -20,6 +20,7 @@ package validation ...@@ -20,6 +20,7 @@ package validation
import ( import (
"errors" "errors"
"fmt" "fmt"
"sort"
nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1alpha1" nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1alpha1"
internalwebhook "github.com/ai-dynamo/dynamo/deploy/operator/internal/webhook" internalwebhook "github.com/ai-dynamo/dynamo/deploy/operator/internal/webhook"
...@@ -79,6 +80,11 @@ func (v *DynamoGraphDeploymentValidator) ValidateUpdate(old *nvidiacomv1alpha1.D ...@@ -79,6 +80,11 @@ func (v *DynamoGraphDeploymentValidator) ValidateUpdate(old *nvidiacomv1alpha1.D
return warnings, err return warnings, err
} }
// Validate service topology is unchanged (service names must remain the same)
if err := v.validateServiceTopology(old); err != nil {
return warnings, err
}
// Validate replicas changes for services with scaling adapter enabled // Validate replicas changes for services with scaling adapter enabled
// Pass userInfo (may be nil - will fail closed for DGDSA-enabled services) // Pass userInfo (may be nil - will fail closed for DGDSA-enabled services)
if err := v.validateReplicasChanges(old, userInfo); err != nil { if err := v.validateReplicasChanges(old, userInfo); err != nil {
...@@ -119,6 +125,48 @@ func (v *DynamoGraphDeploymentValidator) validateImmutableFields(old *nvidiacomv ...@@ -119,6 +125,48 @@ func (v *DynamoGraphDeploymentValidator) validateImmutableFields(old *nvidiacomv
} }
// validateServiceTopology ensures the set of service names remains unchanged.
// Users can modify service specifications, but cannot add or remove services.
// This maintains graph topology immutability while allowing configuration updates.
func (v *DynamoGraphDeploymentValidator) validateServiceTopology(old *nvidiacomv1alpha1.DynamoGraphDeployment) error {
oldServices := getServiceNames(old.Spec.Services)
newServices := getServiceNames(v.deployment.Spec.Services)
added := difference(newServices, oldServices)
removed := difference(oldServices, newServices)
// Fast path: no changes
if len(added) == 0 && len(removed) == 0 {
return nil
}
// Sort for deterministic error messages
sort.Strings(added)
sort.Strings(removed)
// Build descriptive error message
var errMsg string
switch {
case len(added) > 0 && len(removed) > 0:
errMsg = fmt.Sprintf(
"service topology is immutable and cannot be modified after creation: "+
"services added: %v, services removed: %v",
added, removed)
case len(added) > 0:
errMsg = fmt.Sprintf(
"service topology is immutable and cannot be modified after creation: "+
"services added: %v",
added)
case len(removed) > 0:
errMsg = fmt.Sprintf(
"service topology is immutable and cannot be modified after creation: "+
"services removed: %v",
removed)
}
return errors.New(errMsg)
}
// validateReplicasChanges checks if replicas were changed for services with scaling adapter enabled. // validateReplicasChanges checks if replicas were changed for services with scaling adapter enabled.
// Only authorized service accounts (operator controller, planner) can modify these fields. // Only authorized service accounts (operator controller, planner) can modify these fields.
// If userInfo is nil, all replica changes for DGDSA-enabled services are rejected (fail closed). // If userInfo is nil, all replica changes for DGDSA-enabled services are rejected (fail closed).
...@@ -214,3 +262,25 @@ func (v *DynamoGraphDeploymentValidator) validatePVC(index int, pvc *nvidiacomv1 ...@@ -214,3 +262,25 @@ func (v *DynamoGraphDeploymentValidator) validatePVC(index int, pvc *nvidiacomv1
return err return err
} }
// getServiceNames extracts service names from a services map.
// Returns a set-like map for efficient lookup and comparison.
func getServiceNames(services map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec) map[string]struct{} {
names := make(map[string]struct{}, len(services))
for name := range services {
names[name] = struct{}{}
}
return names
}
// difference returns elements in set a that are not in set b (a - b).
// This is used to find added or removed services.
func difference(a, b map[string]struct{}) []string {
var result []string
for name := range a {
if _, exists := b[name]; !exists {
result = append(result, name)
}
}
return result
}
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
package validation package validation
import ( import (
"sort"
"strings" "strings"
"testing" "testing"
...@@ -394,12 +395,151 @@ func TestDynamoGraphDeploymentValidator_ValidateUpdate(t *testing.T) { ...@@ -394,12 +395,151 @@ func TestDynamoGraphDeploymentValidator_ValidateUpdate(t *testing.T) {
expectedWarnMsg: "Changing spec.backendFramework may cause unexpected behavior", expectedWarnMsg: "Changing spec.backendFramework may cause unexpected behavior",
}, },
{ {
name: "adding new service is allowed", name: "adding single service is prohibited",
oldDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{ oldDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{ Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang", BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{ Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"main": {}, "backend": {},
},
},
},
newDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
"frontend": {},
},
},
},
wantErr: true,
errMsg: "service topology is immutable and cannot be modified after creation: services added: [frontend]",
},
{
name: "adding multiple services is prohibited",
oldDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
},
},
},
newDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
"cache": {},
"frontend": {},
},
},
},
wantErr: true,
errMsg: "service topology is immutable and cannot be modified after creation: services added: [cache frontend]",
},
{
name: "removing single service is prohibited",
oldDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
"frontend": {},
},
},
},
newDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
},
},
},
wantErr: true,
errMsg: "service topology is immutable and cannot be modified after creation: services removed: [frontend]",
},
{
name: "removing multiple services is prohibited",
oldDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
"cache": {},
"frontend": {},
},
},
},
newDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
},
},
},
wantErr: true,
errMsg: "service topology is immutable and cannot be modified after creation: services removed: [cache frontend]",
},
{
name: "adding and removing services simultaneously is prohibited",
oldDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
"cache": {},
},
},
},
newDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
"frontend": {},
},
},
},
wantErr: true,
errMsg: "service topology is immutable and cannot be modified after creation: services added: [frontend], services removed: [cache]",
},
{
name: "modifying service specifications is allowed",
oldDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {
Replicas: func() *int32 { r := int32(1); return &r }(),
},
},
},
},
newDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {
Replicas: func() *int32 { r := int32(3); return &r }(),
},
},
},
},
wantErr: false,
},
{
name: "service topology unchanged with same services",
oldDeployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
"frontend": {},
"cache": {},
}, },
}, },
}, },
...@@ -407,8 +547,9 @@ func TestDynamoGraphDeploymentValidator_ValidateUpdate(t *testing.T) { ...@@ -407,8 +547,9 @@ func TestDynamoGraphDeploymentValidator_ValidateUpdate(t *testing.T) {
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{ Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
BackendFramework: "sglang", BackendFramework: "sglang",
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{ Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"main": {}, "backend": {},
"prefill": {}, "frontend": {},
"cache": {},
}, },
}, },
}, },
...@@ -738,3 +879,156 @@ func TestDynamoGraphDeploymentValidator_ValidateUpdate(t *testing.T) { ...@@ -738,3 +879,156 @@ func TestDynamoGraphDeploymentValidator_ValidateUpdate(t *testing.T) {
}) })
} }
} }
func TestGetServiceNames(t *testing.T) {
tests := []struct {
name string
services map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec
want map[string]struct{}
}{
{
name: "empty services",
services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{},
want: map[string]struct{}{},
},
{
name: "single service",
services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
},
want: map[string]struct{}{
"backend": {},
},
},
{
name: "multiple services",
services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"backend": {},
"frontend": {},
"cache": {},
},
want: map[string]struct{}{
"backend": {},
"frontend": {},
"cache": {},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := getServiceNames(tt.services)
if len(got) != len(tt.want) {
t.Errorf("getServiceNames() length = %v, want %v", len(got), len(tt.want))
return
}
for name := range tt.want {
if _, exists := got[name]; !exists {
t.Errorf("getServiceNames() missing service %v", name)
}
}
})
}
}
func TestDifference(t *testing.T) {
tests := []struct {
name string
a map[string]struct{}
b map[string]struct{}
want []string
}{
{
name: "empty sets",
a: map[string]struct{}{},
b: map[string]struct{}{},
want: nil,
},
{
name: "a is empty",
a: map[string]struct{}{},
b: map[string]struct{}{
"backend": {},
},
want: nil,
},
{
name: "b is empty",
a: map[string]struct{}{
"backend": {},
},
b: map[string]struct{}{},
want: []string{"backend"},
},
{
name: "no difference - identical sets",
a: map[string]struct{}{
"backend": {},
"frontend": {},
},
b: map[string]struct{}{
"backend": {},
"frontend": {},
},
want: nil,
},
{
name: "single element difference",
a: map[string]struct{}{
"backend": {},
"frontend": {},
},
b: map[string]struct{}{
"backend": {},
},
want: []string{"frontend"},
},
{
name: "multiple element difference",
a: map[string]struct{}{
"backend": {},
"frontend": {},
"cache": {},
},
b: map[string]struct{}{
"backend": {},
},
want: []string{"cache", "frontend"},
},
{
name: "completely different sets",
a: map[string]struct{}{
"frontend": {},
"cache": {},
},
b: map[string]struct{}{
"backend": {},
},
want: []string{"cache", "frontend"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := difference(tt.a, tt.b)
// Sort both slices for comparison (since map iteration order is undefined)
sort.Strings(got)
want := make([]string, len(tt.want))
copy(want, tt.want)
sort.Strings(want)
if len(got) != len(want) {
t.Errorf("difference() length = %v, want %v", len(got), len(want))
return
}
for i := range got {
if got[i] != want[i] {
t.Errorf("difference() = %v, want %v", got, want)
return
}
}
})
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment