Unverified Commit 09f2314d authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

feat: add scaling adapter (#4699)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 1f9b69b0
......@@ -1034,7 +1034,7 @@ func GenerateGrovePodCliqueSet(
PodSpec: *podSpec,
},
}
labels, err := generateLabels(component, dynamoDeployment, r.Name)
labels, err := generateLabels(component, dynamoDeployment, serviceName)
if err != nil {
return nil, fmt.Errorf("failed to generate labels: %w", err)
}
......@@ -1075,6 +1075,7 @@ func generateLabels(component *v1alpha1.DynamoComponentDeploymentSharedSpec, dyn
labels := make(map[string]string)
labels[commonconsts.KubeLabelDynamoSelector] = GetDynamoComponentName(dynamoDeployment, componentName)
labels[commonconsts.KubeLabelDynamoGraphDeploymentName] = dynamoDeployment.Name
labels[commonconsts.KubeLabelDynamoComponent] = componentName
if component.DynamoNamespace != nil {
labels[commonconsts.KubeLabelDynamoNamespace] = *component.DynamoNamespace
}
......
......@@ -121,7 +121,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
commonconsts.KubeLabelDynamoNamespace: "default-test-dynamographdeployment",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamographdeployment",
},
Autoscaling: nil,
},
},
},
......@@ -153,7 +152,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
Custom: map[string]string{},
},
},
Autoscaling: nil,
},
},
},
......@@ -229,7 +227,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
commonconsts.KubeLabelDynamoNamespace: "default-test-dynamographdeployment",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamographdeployment",
},
Autoscaling: nil,
},
},
},
......@@ -261,7 +258,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
Custom: map[string]string{},
},
},
Autoscaling: nil,
},
},
},
......@@ -341,7 +337,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
commonconsts.KubeLabelDynamoNamespace: "default-test-dynamographdeployment",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamographdeployment",
},
Autoscaling: nil,
Ingress: &v1alpha1.IngressSpec{
Enabled: true,
Host: "test-dynamographdeployment",
......@@ -377,7 +372,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
Custom: map[string]string{},
},
},
Autoscaling: nil,
},
},
},
......@@ -465,7 +459,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
commonconsts.KubeLabelDynamoNamespace: "default-test-dynamographdeployment",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamographdeployment",
},
Autoscaling: nil,
Envs: []corev1.EnvVar{
{
Name: "DYN_DEPLOYMENT_CONFIG",
......@@ -503,7 +496,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
Custom: map[string]string{},
},
},
Autoscaling: nil,
Envs: []corev1.EnvVar{
{
Name: "DYN_DEPLOYMENT_CONFIG",
......@@ -599,7 +591,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
commonconsts.KubeLabelDynamoNamespace: "default-test-dynamographdeployment",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamographdeployment",
},
Autoscaling: nil,
ExtraPodSpec: &v1alpha1.ExtraPodSpec{
MainContainer: &corev1.Container{
Command: []string{"sh", "-c"},
......@@ -644,7 +635,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
Custom: map[string]string{},
},
},
Autoscaling: nil,
Envs: []corev1.EnvVar{
{
Name: "TEST_ENV",
......@@ -1307,6 +1297,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Name: "frontend",
Labels: map[string]string{
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-frontend",
commonconsts.KubeLabelDynamoComponent: "Frontend",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeFrontend,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
......@@ -1483,6 +1474,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Labels: map[string]string{
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-planner",
commonconsts.KubeLabelDynamoComponent: "Planner",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypePlanner,
commonconsts.KubeLabelDynamoNamespace: "test-namespace-test-dynamo-graph-deployment",
......@@ -1884,8 +1876,9 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker-ldr",
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
commonconsts.KubeLabelDynamoComponent: "worker",
commonconsts.KubeLabelDynamoNamespace: "test-namespace-test-dynamo-graph-deployment",
"nvidia.com/label1": "label1",
"nvidia.com/label2": "label2",
......@@ -2059,8 +2052,9 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker-wkr",
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
commonconsts.KubeLabelDynamoComponent: "worker",
commonconsts.KubeLabelDynamoNamespace: "test-namespace-test-dynamo-graph-deployment",
"nvidia.com/label1": "label1",
"nvidia.com/label2": "label2",
......@@ -2200,6 +2194,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-frontend",
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeFrontend,
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
commonconsts.KubeLabelDynamoComponent: "Frontend",
commonconsts.KubeLabelDynamoNamespace: "test-namespace-test-dynamo-graph-deployment",
},
Annotations: map[string]string{},
......@@ -2358,6 +2353,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Name: "planner",
Labels: map[string]string{
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-planner",
commonconsts.KubeLabelDynamoComponent: "Planner",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypePlanner,
......@@ -2779,7 +2775,8 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
{
Name: "worker-ldr",
Labels: map[string]string{
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker-ldr",
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker",
commonconsts.KubeLabelDynamoComponent: "worker",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
......@@ -2943,7 +2940,8 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Labels: map[string]string{
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker-wkr",
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker",
commonconsts.KubeLabelDynamoComponent: "worker",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
commonconsts.KubeLabelDynamoNamespace: "test-namespace-test-dynamo-graph-deployment",
"nvidia.com/label1": "label1",
......@@ -3084,6 +3082,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-frontend",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
commonconsts.KubeLabelDynamoComponent: "Frontend",
commonconsts.KubeLabelDynamoNamespace: "test-namespace-test-dynamo-graph-deployment",
},
Annotations: map[string]string{},
......@@ -3243,6 +3242,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Labels: map[string]string{
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-planner",
commonconsts.KubeLabelDynamoComponent: "Planner",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypePlanner,
commonconsts.KubeLabelDynamoNamespace: "test-namespace-test-dynamo-graph-deployment",
......
......@@ -19,7 +19,9 @@ package webhook
import (
"context"
"strings"
authenticationv1 "k8s.io/api/authentication/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"
......@@ -118,3 +120,54 @@ func (v *LeaseAwareValidator) shouldSkipValidation(obj runtime.Object) bool {
return false
}
// DGDReplicasModifierSuffixes defines suffixes for service accounts that are authorized
// to modify DGD replicas when scaling adapter is enabled.
// Service accounts matching any of these suffixes are allowed regardless of namespace.
var DGDReplicasModifierSuffixes = []string{
// Dynamo operator controller manager (handles DGDSA reconciliation)
// Example: "dynamo-platform-dynamo-operator-controller-manager"
"-dynamo-operator-controller-manager",
// Planner service account (manages DGD replicas for autoscaling)
// Example: "planner-serviceaccount"
"planner-serviceaccount",
}
// CanModifyDGDReplicas checks if the request comes from a service account authorized
// to modify DGD replicas when scaling adapter is enabled.
// Service accounts are identified by username format: system:serviceaccount:<namespace>:<name>
//
// Authorized service accounts (by suffix):
// - *-dynamo-operator-controller-manager (for DGDSA reconciliation)
// - *planner-serviceaccount (for Planner autoscaling)
func CanModifyDGDReplicas(userInfo authenticationv1.UserInfo) bool {
username := userInfo.Username
// Service accounts have username format: system:serviceaccount:<namespace>:<name>
if !strings.HasPrefix(username, "system:serviceaccount:") {
return false
}
// Parse: system:serviceaccount:<namespace>:<name>
parts := strings.Split(username, ":")
if len(parts) != 4 {
return false
}
namespace := parts[2]
saName := parts[3]
// Check against authorized suffixes
for _, suffix := range DGDReplicasModifierSuffixes {
if strings.HasSuffix(saName, suffix) {
webhookCommonLog.V(1).Info("allowing DGD replicas modification",
"serviceAccount", saName,
"namespace", namespace,
"matchedSuffix", suffix)
return true
}
}
return false
}
......@@ -42,13 +42,10 @@ func NewDynamoComponentDeploymentValidator(deployment *nvidiacomv1alpha1.DynamoC
func (v *DynamoComponentDeploymentValidator) Validate() (admission.Warnings, error) {
// Validate shared spec fields using SharedSpecValidator
sharedValidator := NewSharedSpecValidator(&v.deployment.Spec.DynamoComponentDeploymentSharedSpec, "spec")
if err := sharedValidator.Validate(); err != nil {
return nil, err
}
// DCD-specific validation would go here (currently none)
return nil, nil
return sharedValidator.Validate()
}
// ValidateUpdate performs stateful validation comparing old and new DynamoComponentDeployment.
......
......@@ -47,11 +47,6 @@ func TestDynamoComponentDeploymentValidator_Validate(t *testing.T) {
Spec: nvidiacomv1alpha1.DynamoComponentDeploymentSpec{
DynamoComponentDeploymentSharedSpec: nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
Replicas: &validReplicas,
Autoscaling: &nvidiacomv1alpha1.Autoscaling{
Enabled: true,
MinReplicas: 1,
MaxReplicas: 10,
},
},
BackendFramework: "sglang",
},
......@@ -74,26 +69,6 @@ func TestDynamoComponentDeploymentValidator_Validate(t *testing.T) {
wantErr: true,
errMsg: "spec.replicas must be non-negative",
},
{
name: "invalid autoscaling",
deployment: &nvidiacomv1alpha1.DynamoComponentDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-deployment",
Namespace: "default",
},
Spec: nvidiacomv1alpha1.DynamoComponentDeploymentSpec{
DynamoComponentDeploymentSharedSpec: nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
Autoscaling: &nvidiacomv1alpha1.Autoscaling{
Enabled: true,
MinReplicas: 5,
MaxReplicas: 3,
},
},
},
},
wantErr: true,
errMsg: "spec.autoscaling.maxReplicas must be > minReplicas",
},
{
name: "invalid ingress",
deployment: &nvidiacomv1alpha1.DynamoComponentDeployment{
......
......@@ -22,6 +22,8 @@ import (
"fmt"
nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
internalwebhook "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/webhook"
authenticationv1 "k8s.io/api/authentication/v1"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)
......@@ -51,30 +53,106 @@ func (v *DynamoGraphDeploymentValidator) Validate() (admission.Warnings, error)
return nil, err
}
var allWarnings admission.Warnings
// Validate each service
for serviceName, service := range v.deployment.Spec.Services {
if err := v.validateService(serviceName, service); err != nil {
warnings, err := v.validateService(serviceName, service)
if err != nil {
return nil, err
}
allWarnings = append(allWarnings, warnings...)
}
return nil, nil
return allWarnings, nil
}
// ValidateUpdate performs stateful validation comparing old and new DynamoGraphDeployment.
// userInfo is used for identity-based validation (replica protection).
// If userInfo is nil, replica changes for DGDSA-enabled services are rejected (fail closed).
// Returns warnings and error.
func (v *DynamoGraphDeploymentValidator) ValidateUpdate(old *nvidiacomv1alpha1.DynamoGraphDeployment) (admission.Warnings, error) {
// Validate that BackendFramework is not changed (immutable)
func (v *DynamoGraphDeploymentValidator) ValidateUpdate(old *nvidiacomv1alpha1.DynamoGraphDeployment, userInfo *authenticationv1.UserInfo) (admission.Warnings, error) {
var warnings admission.Warnings
// Validate immutable fields
if err := v.validateImmutableFields(old, &warnings); err != nil {
return warnings, err
}
// Validate replicas changes for services with scaling adapter enabled
// Pass userInfo (may be nil - will fail closed for DGDSA-enabled services)
if err := v.validateReplicasChanges(old, userInfo); err != nil {
return warnings, err
}
return warnings, nil
}
// validateImmutableFields checks that immutable fields have not been changed.
// Appends warnings to the provided slice.
func (v *DynamoGraphDeploymentValidator) validateImmutableFields(old *nvidiacomv1alpha1.DynamoGraphDeployment, warnings *admission.Warnings) error {
if v.deployment.Spec.BackendFramework != old.Spec.BackendFramework {
warning := "Changing spec.backendFramework may cause unexpected behavior"
return admission.Warnings{warning}, fmt.Errorf("spec.backendFramework is immutable and cannot be changed after creation")
*warnings = append(*warnings, "Changing spec.backendFramework may cause unexpected behavior")
return fmt.Errorf("spec.backendFramework is immutable and cannot be changed after creation")
}
return nil
}
return nil, nil
// validateReplicasChanges checks if replicas were changed for services with scaling adapter enabled.
// Only authorized service accounts (operator controller, planner) can modify these fields.
// If userInfo is nil, all replica changes for DGDSA-enabled services are rejected (fail closed).
func (v *DynamoGraphDeploymentValidator) validateReplicasChanges(old *nvidiacomv1alpha1.DynamoGraphDeployment, userInfo *authenticationv1.UserInfo) error {
// If the request comes from an authorized service account, allow the change
if userInfo != nil && internalwebhook.CanModifyDGDReplicas(*userInfo) {
return nil
}
var errs []error
for serviceName, newService := range v.deployment.Spec.Services {
// Check if scaling adapter is enabled for this service (enabled by default)
scalingAdapterEnabled := true
if newService.ScalingAdapter != nil && newService.ScalingAdapter.Disable {
scalingAdapterEnabled = false
}
if !scalingAdapterEnabled {
// Scaling adapter is disabled, users can modify replicas directly
continue
}
// Get old service (if exists)
oldService, exists := old.Spec.Services[serviceName]
if !exists {
// New service, no comparison needed
continue
}
// Check if replicas changed
oldReplicas := int32(1) // default
if oldService.Replicas != nil {
oldReplicas = *oldService.Replicas
}
newReplicas := int32(1) // default
if newService.Replicas != nil {
newReplicas = *newService.Replicas
}
if oldReplicas != newReplicas {
errs = append(errs, fmt.Errorf(
"spec.services[%s].replicas cannot be modified directly when scaling adapter is enabled; "+
"scale or update the related DynamoGraphDeploymentScalingAdapter instead",
serviceName))
}
}
return errors.Join(errs...)
}
// validateService validates a single service configuration using SharedSpecValidator.
func (v *DynamoGraphDeploymentValidator) validateService(serviceName string, service *nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec) error {
// Returns warnings and error.
func (v *DynamoGraphDeploymentValidator) validateService(serviceName string, service *nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec) (admission.Warnings, error) {
// Use SharedSpecValidator to validate service spec (which is a DynamoComponentDeploymentSharedSpec)
fieldPath := fmt.Sprintf("spec.services[%s]", serviceName)
sharedValidator := NewSharedSpecValidator(service, fieldPath)
......
......@@ -23,6 +23,7 @@ import (
nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
internalwebhook "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/webhook"
authenticationv1 "k8s.io/api/authentication/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
......@@ -91,9 +92,24 @@ func (h *DynamoGraphDeploymentHandler) ValidateUpdate(ctx context.Context, oldOb
return warnings, err
}
// Validate stateful rules (immutability)
updateWarnings, err := validator.ValidateUpdate(oldDeployment)
// Get user info from admission request context for identity-based validation
var userInfo *authenticationv1.UserInfo
req, err := admission.RequestFromContext(ctx)
if err != nil {
logger.Error(err, "failed to get admission request from context, replica changes for DGDSA-enabled services will be rejected")
// userInfo remains nil - validateReplicasChanges will fail closed
} else {
userInfo = &req.UserInfo
}
// Validate stateful rules (immutability + replicas protection)
updateWarnings, err := validator.ValidateUpdate(oldDeployment, userInfo)
if err != nil {
username := "<unknown>"
if userInfo != nil {
username = userInfo.Username
}
logger.Info("validation failed", "error", err.Error(), "user", username)
return updateWarnings, err
}
......
......@@ -93,28 +93,6 @@ func TestDynamoGraphDeploymentValidator_Validate(t *testing.T) {
wantErr: true,
errMsg: "spec.services[main].replicas must be non-negative",
},
{
name: "service with invalid autoscaling",
deployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-graph",
Namespace: "default",
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentSpec{
Services: map[string]*nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
"prefill": {
Autoscaling: &nvidiacomv1alpha1.Autoscaling{
Enabled: true,
MinReplicas: 10,
MaxReplicas: 5,
},
},
},
},
},
wantErr: true,
errMsg: "spec.services[prefill].autoscaling.maxReplicas must be > minReplicas",
},
{
name: "service with invalid ingress",
deployment: &nvidiacomv1alpha1.DynamoGraphDeployment{
......@@ -441,7 +419,8 @@ func TestDynamoGraphDeploymentValidator_ValidateUpdate(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
validator := NewDynamoGraphDeploymentValidator(tt.newDeployment)
warnings, err := validator.ValidateUpdate(tt.oldDeployment)
// Pass nil userInfo - these tests don't modify replicas, so it's safe
warnings, err := validator.ValidateUpdate(tt.oldDeployment, nil)
if (err != nil) != tt.wantErr {
t.Errorf("DynamoGraphDeploymentValidator.ValidateUpdate() error = %v, wantErr %v", err, tt.wantErr)
......
......@@ -21,6 +21,7 @@ import (
"fmt"
nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)
// SharedSpecValidator validates DynamoComponentDeploymentSharedSpec fields.
......@@ -41,61 +42,45 @@ func NewSharedSpecValidator(spec *nvidiacomv1alpha1.DynamoComponentDeploymentSha
}
// Validate performs validation on the shared spec fields.
// Returns an error if validation fails.
func (v *SharedSpecValidator) Validate() error {
// Returns warnings (e.g., deprecation notices) and error if validation fails.
func (v *SharedSpecValidator) Validate() (admission.Warnings, error) {
// Validate replicas if specified
if v.spec.Replicas != nil && *v.spec.Replicas < 0 {
return fmt.Errorf("%s.replicas must be non-negative", v.fieldPath)
}
// Validate autoscaling configuration if specified
if v.spec.Autoscaling != nil {
if err := v.validateAutoscaling(); err != nil {
return err
}
return nil, fmt.Errorf("%s.replicas must be non-negative", v.fieldPath)
}
// Validate ingress configuration if enabled
if v.spec.Ingress != nil && v.spec.Ingress.Enabled {
if err := v.validateIngress(); err != nil {
return err
return nil, err
}
}
// Validate volume mounts
if err := v.validateVolumeMounts(); err != nil {
return err
return nil, err
}
// Validate shared memory
if v.spec.SharedMemory != nil {
if err := v.validateSharedMemory(); err != nil {
return err
return nil, err
}
}
return nil
}
// validateAutoscaling validates the autoscaling configuration.
func (v *SharedSpecValidator) validateAutoscaling() error {
autoscaling := v.spec.Autoscaling
if !autoscaling.Enabled {
return nil
}
// Validate minReplicas
if autoscaling.MinReplicas < 1 {
return fmt.Errorf("%s.autoscaling.minReplicas must be >= 1", v.fieldPath)
}
// Collect warnings (e.g., deprecation notices)
var warnings admission.Warnings
// Validate maxReplicas
if autoscaling.MaxReplicas <= autoscaling.MinReplicas {
return fmt.Errorf("%s.autoscaling.maxReplicas must be > minReplicas", v.fieldPath)
// Check for deprecated autoscaling field
//nolint:staticcheck // SA1019: Intentionally checking deprecated field to warn users
if v.spec.Autoscaling != nil {
warnings = append(warnings, fmt.Sprintf(
"%s.autoscaling is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter "+
"with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md",
v.fieldPath))
}
return nil
return warnings, nil
}
// validateIngress validates the ingress configuration.
......
......@@ -41,11 +41,6 @@ func TestSharedSpecValidator_Validate(t *testing.T) {
name: "valid spec with all fields",
spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
Replicas: &validReplicas,
Autoscaling: &nvidiacomv1alpha1.Autoscaling{
Enabled: true,
MinReplicas: 1,
MaxReplicas: 10,
},
Ingress: &nvidiacomv1alpha1.IngressSpec{
Enabled: true,
Host: "example.com",
......@@ -77,44 +72,6 @@ func TestSharedSpecValidator_Validate(t *testing.T) {
wantErr: true,
errMsg: "spec.replicas must be non-negative",
},
{
name: "autoscaling minReplicas too low",
spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
Autoscaling: &nvidiacomv1alpha1.Autoscaling{
Enabled: true,
MinReplicas: 0,
MaxReplicas: 10,
},
},
fieldPath: "spec",
wantErr: true,
errMsg: "spec.autoscaling.minReplicas must be >= 1",
},
{
name: "autoscaling maxReplicas less than minReplicas",
spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
Autoscaling: &nvidiacomv1alpha1.Autoscaling{
Enabled: true,
MinReplicas: 5,
MaxReplicas: 3,
},
},
fieldPath: "spec",
wantErr: true,
errMsg: "spec.autoscaling.maxReplicas must be > minReplicas",
},
{
name: "autoscaling disabled - no validation",
spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
Autoscaling: &nvidiacomv1alpha1.Autoscaling{
Enabled: false,
MinReplicas: 0,
MaxReplicas: 0,
},
},
fieldPath: "spec",
wantErr: false,
},
{
name: "ingress enabled without host",
spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
......@@ -227,7 +184,7 @@ func TestSharedSpecValidator_Validate(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
validator := NewSharedSpecValidator(tt.spec, tt.fieldPath)
err := validator.Validate()
_, err := validator.Validate()
if (err != nil) != tt.wantErr {
t.Errorf("SharedSpecValidator.Validate() error = %v, wantErr %v", err, tt.wantErr)
......@@ -240,3 +197,53 @@ func TestSharedSpecValidator_Validate(t *testing.T) {
})
}
}
func TestSharedSpecValidator_Validate_Warnings(t *testing.T) {
validReplicas := int32(3)
tests := []struct {
name string
spec *nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec
fieldPath string
wantWarnings int
}{
{
name: "no warnings for spec without autoscaling",
spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
Replicas: &validReplicas,
},
fieldPath: "spec",
wantWarnings: 0,
},
{
name: "warning for deprecated autoscaling field enabled",
spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
Replicas: &validReplicas,
//nolint:staticcheck // SA1019: Intentionally testing deprecated field
Autoscaling: &nvidiacomv1alpha1.Autoscaling{
Enabled: true,
MinReplicas: 1,
MaxReplicas: 10,
},
},
fieldPath: "spec",
wantWarnings: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
validator := NewSharedSpecValidator(tt.spec, tt.fieldPath)
warnings, err := validator.Validate()
if err != nil {
t.Errorf("SharedSpecValidator.Validate() unexpected error = %v", err)
return
}
if len(warnings) != tt.wantWarnings {
t.Errorf("SharedSpecValidator.Validate() warnings count = %d, want %d", len(warnings), tt.wantWarnings)
}
})
}
}
......@@ -10,3 +10,4 @@ Deployment Guide
Webhooks <../kubernetes/webhooks>
Minikube Setup <../kubernetes/deployment/minikube>
Managing Models with DynamoModel <../kubernetes/deployment/dynamomodel-guide>
Autoscaling <../kubernetes/autoscaling>
......@@ -37,6 +37,7 @@ Package v1alpha1 contains API Schema definitions for the nvidia.com v1alpha1 API
- [DynamoComponentDeployment](#dynamocomponentdeployment)
- [DynamoGraphDeployment](#dynamographdeployment)
- [DynamoGraphDeploymentRequest](#dynamographdeploymentrequest)
- [DynamoGraphDeploymentScalingAdapter](#dynamographdeploymentscalingadapter)
- [DynamoModel](#dynamomodel)
......@@ -45,7 +46,9 @@ Package v1alpha1 contains API Schema definitions for the nvidia.com v1alpha1 API
Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
for migration guidance. This field will be removed in a future API version.
......@@ -55,11 +58,11 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `enabled` _boolean_ | | | |
| `minReplicas` _integer_ | | | |
| `maxReplicas` _integer_ | | | |
| `behavior` _[HorizontalPodAutoscalerBehavior](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#horizontalpodautoscalerbehavior-v2-autoscaling)_ | | | |
| `metrics` _[MetricSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#metricspec-v2-autoscaling) array_ | | | |
| `enabled` _boolean_ | Deprecated: This field is ignored. | | |
| `minReplicas` _integer_ | Deprecated: This field is ignored. | | |
| `maxReplicas` _integer_ | Deprecated: This field is ignored. | | |
| `behavior` _[HorizontalPodAutoscalerBehavior](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#horizontalpodautoscalerbehavior-v2-autoscaling)_ | Deprecated: This field is ignored. | | |
| `metrics` _[MetricSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#metricspec-v2-autoscaling) array_ | Deprecated: This field is ignored. | | |
......@@ -165,7 +168,7 @@ _Appears in:_
| `dynamoNamespace` _string_ | DynamoNamespace is deprecated and will be removed in a future version.<br />The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component | | Optional: \{\} <br /> |
| `globalDynamoNamespace` _boolean_ | GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace | | |
| `resources` _[Resources](#resources)_ | Resources requested and limits for this component, including CPU, memory,<br />GPUs/devices, and any runtime-specific resources. | | |
| `autoscaling` _[Autoscaling](#autoscaling)_ | Autoscaling config for this component (replica range, target utilization, etc.). | | |
| `autoscaling` _[Autoscaling](#autoscaling)_ | Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter<br />with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md<br />for migration guidance. This field will be removed in a future API version. | | |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables to inject into the component containers. | | |
| `envFromSecret` _string_ | EnvFromSecret references a Secret whose key/value pairs will be exposed as<br />environment variables in the component containers. | | |
| `volumeMounts` _[VolumeMount](#volumemount) array_ | VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component. | | |
......@@ -176,8 +179,9 @@ _Appears in:_
| `extraPodSpec` _[ExtraPodSpec](#extrapodspec)_ | ExtraPodSpec allows to override the main pod spec configuration.<br />It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field<br />that allows overriding the main container configuration. | | |
| `livenessProbe` _[Probe](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#probe-v1-core)_ | LivenessProbe to detect and restart unhealthy containers. | | |
| `readinessProbe` _[Probe](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#probe-v1-core)_ | ReadinessProbe to signal when the container is ready to receive traffic. | | |
| `replicas` _integer_ | Replicas is the desired number of Pods for this component when autoscaling is not used. | | |
| `replicas` _integer_ | Replicas is the desired number of Pods for this component.<br />When scalingAdapter is enabled (default), this field is managed by the<br />DynamoGraphDeploymentScalingAdapter and should not be modified directly. | | Minimum: 0 <br /> |
| `multinode` _[MultinodeSpec](#multinodespec)_ | Multinode is the configuration for multinode components. | | |
| `scalingAdapter` _[ScalingAdapter](#scalingadapter)_ | ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.<br />When enabled (default), replicas are managed via DGDSA and external autoscalers can scale<br />the service using the Scale subresource. When disabled, replicas can be modified directly. | | |
#### DynamoComponentDeploymentSpec
......@@ -202,7 +206,7 @@ _Appears in:_
| `dynamoNamespace` _string_ | DynamoNamespace is deprecated and will be removed in a future version.<br />The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component | | Optional: \{\} <br /> |
| `globalDynamoNamespace` _boolean_ | GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace | | |
| `resources` _[Resources](#resources)_ | Resources requested and limits for this component, including CPU, memory,<br />GPUs/devices, and any runtime-specific resources. | | |
| `autoscaling` _[Autoscaling](#autoscaling)_ | Autoscaling config for this component (replica range, target utilization, etc.). | | |
| `autoscaling` _[Autoscaling](#autoscaling)_ | Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter<br />with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md<br />for migration guidance. This field will be removed in a future API version. | | |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables to inject into the component containers. | | |
| `envFromSecret` _string_ | EnvFromSecret references a Secret whose key/value pairs will be exposed as<br />environment variables in the component containers. | | |
| `volumeMounts` _[VolumeMount](#volumemount) array_ | VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component. | | |
......@@ -213,8 +217,9 @@ _Appears in:_
| `extraPodSpec` _[ExtraPodSpec](#extrapodspec)_ | ExtraPodSpec allows to override the main pod spec configuration.<br />It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field<br />that allows overriding the main container configuration. | | |
| `livenessProbe` _[Probe](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#probe-v1-core)_ | LivenessProbe to detect and restart unhealthy containers. | | |
| `readinessProbe` _[Probe](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#probe-v1-core)_ | ReadinessProbe to signal when the container is ready to receive traffic. | | |
| `replicas` _integer_ | Replicas is the desired number of Pods for this component when autoscaling is not used. | | |
| `replicas` _integer_ | Replicas is the desired number of Pods for this component.<br />When scalingAdapter is enabled (default), this field is managed by the<br />DynamoGraphDeploymentScalingAdapter and should not be modified directly. | | Minimum: 0 <br /> |
| `multinode` _[MultinodeSpec](#multinodespec)_ | Multinode is the configuration for multinode components. | | |
| `scalingAdapter` _[ScalingAdapter](#scalingadapter)_ | ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.<br />When enabled (default), replicas are managed via DGDSA and external autoscalers can scale<br />the service using the Scale subresource. When disabled, replicas can be modified directly. | | |
#### DynamoGraphDeployment
......@@ -314,6 +319,83 @@ _Appears in:_
| `deployment` _[DeploymentStatus](#deploymentstatus)_ | Deployment tracks the auto-created DGD when AutoApply is true.<br />Contains name, namespace, state, and creation status of the managed DGD. | | Optional: \{\} <br /> |
#### DynamoGraphDeploymentScalingAdapter
DynamoGraphDeploymentScalingAdapter provides a scaling interface for individual services
within a DynamoGraphDeployment. It implements the Kubernetes scale
subresource, enabling integration with HPA, KEDA, and custom autoscalers.
The adapter acts as an intermediary between autoscalers and the DGD,
ensuring that only the adapter controller modifies the DGD's service replicas.
This prevents conflicts when multiple autoscaling mechanisms are in play.
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `apiVersion` _string_ | `nvidia.com/v1alpha1` | | |
| `kind` _string_ | `DynamoGraphDeploymentScalingAdapter` | | |
| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | |
| `spec` _[DynamoGraphDeploymentScalingAdapterSpec](#dynamographdeploymentscalingadapterspec)_ | | | |
| `status` _[DynamoGraphDeploymentScalingAdapterStatus](#dynamographdeploymentscalingadapterstatus)_ | | | |
#### DynamoGraphDeploymentScalingAdapterSpec
DynamoGraphDeploymentScalingAdapterSpec defines the desired state of DynamoGraphDeploymentScalingAdapter
_Appears in:_
- [DynamoGraphDeploymentScalingAdapter](#dynamographdeploymentscalingadapter)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `replicas` _integer_ | Replicas is the desired number of replicas for the target service.<br />This field is modified by external autoscalers (HPA/KEDA/Planner) or manually by users. | | Minimum: 0 <br />Required: \{\} <br /> |
| `dgdRef` _[DynamoGraphDeploymentServiceRef](#dynamographdeploymentserviceref)_ | DGDRef references the DynamoGraphDeployment and the specific service to scale. | | Required: \{\} <br /> |
#### DynamoGraphDeploymentScalingAdapterStatus
DynamoGraphDeploymentScalingAdapterStatus defines the observed state of DynamoGraphDeploymentScalingAdapter
_Appears in:_
- [DynamoGraphDeploymentScalingAdapter](#dynamographdeploymentscalingadapter)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `replicas` _integer_ | Replicas is the current number of replicas for the target service.<br />This is synced from the DGD's service replicas and is required for the scale subresource. | | |
| `selector` _string_ | Selector is a label selector string for the pods managed by this adapter.<br />Required for HPA compatibility via the scale subresource. | | |
| `lastScaleTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#time-v1-meta)_ | LastScaleTime is the last time the adapter scaled the target service. | | |
#### DynamoGraphDeploymentServiceRef
DynamoGraphDeploymentServiceRef identifies a specific service within a DynamoGraphDeployment
_Appears in:_
- [DynamoGraphDeploymentScalingAdapterSpec](#dynamographdeploymentscalingadapterspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `name` _string_ | Name of the DynamoGraphDeployment | | MinLength: 1 <br />Required: \{\} <br /> |
| `serviceName` _string_ | ServiceName is the key name of the service within the DGD's spec.services map to scale | | MinLength: 1 <br />Required: \{\} <br /> |
#### DynamoGraphDeploymentSpec
......@@ -638,6 +720,25 @@ _Appears in:_
| `claims` _[ResourceClaim](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#resourceclaim-v1-core) array_ | Claims specifies resource claims for dynamic resource allocation | | |
#### ScalingAdapter
ScalingAdapter configures whether a service uses the DynamoGraphDeploymentScalingAdapter
for replica management. When enabled (default), the DGDSA owns the replicas field and
external autoscalers (HPA, KEDA, Planner) can control scaling via the Scale subresource.
_Appears in:_
- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec)
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `disable` _boolean_ | Disable indicates whether the ScalingAdapter should be disabled for this service.<br />When false (default), a DGDSA is created and owns the replicas field.<br />When true, no DGDSA is created and replicas can be modified directly in the DGD. | false | |
#### SharedMemorySpec
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment