Unverified Commit 06fc5d5e authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: Make the image field optional in DGDRs (#6557)


Signed-off-by: default avatarJont828 <jt572@cornell.edu>
Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarJont828 <jt572@cornell.edu>
Co-authored-by: default avatarCopilot <223556219+Copilot@users.noreply.github.com>
Co-authored-by: default avatarHongkuan Zhou <tedzhouhk@gmail.com>
parent 03360b84
......@@ -247,13 +247,20 @@ spec:
]"
echo "📝 Patching MutatingWebhookConfiguration..."
# Patch mutating webhook (DynamoGraphDeployment defaulting)
# Patch all mutating webhooks:
# 0: mdynamographdeployment.kb.io (DGD defaulting)
# 1: mdynamographdeploymentrequestv1beta1.kb.io (DGDR defaulting)
kubectl patch mutatingwebhookconfiguration ${MUTATING_WEBHOOK_NAME} \
--type='json' -p="[
{
\"op\": \"add\",
\"path\": \"/webhooks/0/clientConfig/caBundle\",
\"value\": \"${CA_BUNDLE}\"
},
{
\"op\": \"add\",
\"path\": \"/webhooks/1/clientConfig/caBundle\",
\"value\": \"${CA_BUNDLE}\"
}
]"
......
......@@ -221,4 +221,37 @@ webhooks:
- dynamographdeployments
sideEffects: None
timeoutSeconds: {{ .Values.webhook.timeoutSeconds }}
- admissionReviewVersions:
- v1
clientConfig:
{{- if and (not .Values.webhook.certManager.enabled) .Values.webhook.certificateSecret.external }}
{{- if .Values.webhook.caBundle }}
caBundle: {{ .Values.webhook.caBundle }}
{{- end }}
{{- end }}
service:
name: {{ include "dynamo-operator.fullname" . }}-webhook-service
namespace: {{ .Release.Namespace }}
path: /mutate-nvidia-com-v1beta1-dynamographdeploymentrequest
failurePolicy: {{ .Values.webhook.failurePolicy }}
name: mdynamographdeploymentrequestv1beta1.kb.io
{{- if .Values.webhook.namespaceSelector }}
namespaceSelector:
{{- toYaml .Values.webhook.namespaceSelector | nindent 4 }}
{{- else if .Values.namespaceRestriction.enabled }}
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: {{ .Release.Namespace }}
{{- end }}
rules:
- apiGroups:
- nvidia.com
apiVersions:
- v1beta1
operations:
- CREATE
resources:
- dynamographdeploymentrequests
sideEffects: None
timeoutSeconds: {{ .Values.webhook.timeoutSeconds }}
......@@ -538,6 +538,13 @@ func (d *DynamoGraphDeploymentRequest) GetPhase() DGDRPhase {
return d.Status.Phase
}
// GetState implements the observability.StateProvider interface, returning the
// phase as a string so v1beta1 DGDRs can be counted by the resource counter
// without registering a v1alpha1 cache informer.
func (d *DynamoGraphDeploymentRequest) GetState() string {
return string(d.Status.Phase)
}
// SetProfilingPhase updates the profiling sub-phase.
func (d *DynamoGraphDeploymentRequest) SetProfilingPhase(phase ProfilingPhase) {
d.Status.ProfilingPhase = phase
......
......@@ -552,6 +552,7 @@ func main() {
if err = (&controller.DynamoGraphDeploymentRequestReconciler{
Client: mgr.GetClient(),
APIReader: mgr.GetAPIReader(),
Recorder: mgr.GetEventRecorderFor("dynamographdeploymentrequest"),
Config: operatorCfg,
RuntimeConfig: runtimeConfig,
......@@ -622,8 +623,9 @@ func main() {
os.Exit(1)
}
// Register the DGDR conversion webhook using the hub version (v1beta1).
if err = ctrl.NewWebhookManagedBy(mgr).
For(&nvidiacomv1alpha1.DynamoGraphDeploymentRequest{}).
For(&nvidiacomv1beta1.DynamoGraphDeploymentRequest{}).
Complete(); err != nil {
setupLog.Error(err, "unable to register conversion webhook", "webhook", "DynamoGraphDeploymentRequest-conversion")
os.Exit(1)
......@@ -640,6 +642,12 @@ func main() {
os.Exit(1)
}
dgdrDefaulter := webhookdefaulting.NewDGDRDefaulter(operatorVersion)
if err = dgdrDefaulter.RegisterWithManager(mgr); err != nil {
setupLog.Error(err, "unable to register webhook", "webhook", "DynamoGraphDeploymentRequest-defaulting")
os.Exit(1)
}
setupLog.Info("Defaulting webhooks registered successfully")
//+kubebuilder:scaffold:builder
......
......@@ -92,8 +92,7 @@ const (
// Volume paths
ProfilingOutputPath = "/data"
ProfilingOutputFile = "config_with_planner.yaml"
ProfilingOutputFileMocker = "mocker_config_with_planner.yaml"
ProfilingOutputFile = "final_config.yaml"
ProfilingConfigMountPath = "/config"
ProfilingConfigDefaultKey = "disagg.yaml"
DefaultModelCacheMountPath = "/opt/model-cache"
......@@ -111,7 +110,7 @@ const (
MessageAICProfilingJobCreated = "AIC profiling job created"
MessageProfilingInProgress = "Profiling is in progress"
MessageSpecGenerated = "DynamoGraphDeployment spec generated successfully"
MessageSpecAvailable = "Generated spec is available in status.generatedDeployment"
MessageSpecAvailable = "Generated spec is available in annotation nvidia.com/generated-dgd-spec"
MessageDeploymentCreated = "DynamoGraphDeployment %s created successfully"
MessageDeploymentReady = "DynamoGraphDeployment %s is ready"
MessageDeploymentDegraded = "DynamoGraphDeployment %s degraded from Ready to %s"
......@@ -222,13 +221,6 @@ data:
EOF
sed 's/^/ /' {{.OutputPath}}/{{.OutputFile}} >> /tmp/cm.yaml
# Add mocker config (profiler always generates both real and mocker configs)
if [ -f {{.OutputPath}}/{{.MockerOutputFile}} ]; then
echo " {{.MockerOutputFile}}: |" >> /tmp/cm.yaml
sed 's/^/ /' {{.OutputPath}}/{{.MockerOutputFile}} >> /tmp/cm.yaml
echo "Added mocker config to ConfigMap"
fi
# Add profiler status file for debugging
if [ -f {{.OutputPath}}/profiler_status.yaml ]; then
echo " profiler_status.yaml: |" >> /tmp/cm.yaml
......@@ -245,6 +237,7 @@ echo "Saved profiling output to ConfigMap {{.ConfigMapName}}"
// DynamoGraphDeploymentRequestReconciler reconciles a DynamoGraphDeploymentRequest object
type DynamoGraphDeploymentRequestReconciler struct {
client.Client
APIReader client.Reader
Recorder record.EventRecorder
Config *configv1alpha1.OperatorConfiguration
RuntimeConfig *commonController.RuntimeConfig
......@@ -794,14 +787,7 @@ func isOnlineProfiling(_ *nvidiacomv1beta1.DynamoGraphDeploymentRequest) bool {
func (r *DynamoGraphDeploymentRequestReconciler) validateSpec(ctx context.Context, dgdr *nvidiacomv1beta1.DynamoGraphDeploymentRequest) error {
var errs []error
// Validate image is specified (required for the profiling job container).
// Mirrors the webhook admission check so controller-side writes cannot bypass it.
if dgdr.Spec.Image == "" {
errs = append(errs, fmt.Errorf("spec.image is required"))
}
// Disallow searchStrategy: thorough with backend: auto.
// Mirrors the webhook admission check so controller-side writes cannot bypass it.
if dgdr.Spec.SearchStrategy == nvidiacomv1beta1.SearchStrategyThorough &&
dgdr.Spec.Backend == nvidiacomv1beta1.BackendTypeAuto {
errs = append(errs, fmt.Errorf(
......@@ -850,7 +836,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) validateGPUHardwareInfo(ctx con
return nil
}
_, err := gpu.DiscoverGPUs(ctx, r.Client)
_, err := gpu.DiscoverGPUs(ctx, r.APIReader)
if err == nil {
// GPU discovery is available, validation passes
return nil
......@@ -996,10 +982,15 @@ func (r *DynamoGraphDeploymentRequestReconciler) createProfilingJob(ctx context.
}
// Profiler args: pass the DGDR spec as JSON via --config
profilerArgs := []string{"--config", specJSON}
// --output-dir must match ProfilingOutputPath so the sidecar can find profiler_status.yaml
profilerArgs := []string{"--config", specJSON, "--output-dir", ProfilingOutputPath}
// Use image from spec
// Use image from spec; the defaulting webhook fills this in for production builds.
// Guard against empty image in case the webhook didn't run (e.g. local dev builds).
imageName := dgdr.Spec.Image
if imageName == "" {
return nil, false, fmt.Errorf("spec.image is required but not set; ensure the defaulting webhook ran or set spec.image explicitly")
}
logger.Info("Using profiler image", "image", imageName)
profilerContainer := corev1.Container{
......@@ -1009,6 +1000,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) createProfilingJob(ctx context.
Args: profilerArgs,
Env: profilerEnv,
VolumeMounts: volumeMounts,
WorkingDir: "/workspace",
}
// Generate sidecar script from template
......@@ -1019,12 +1011,11 @@ func (r *DynamoGraphDeploymentRequestReconciler) createProfilingJob(ctx context.
var scriptBuf bytes.Buffer
err = tmpl.Execute(&scriptBuf, map[string]string{
"OutputPath": ProfilingOutputPath,
"OutputFile": ProfilingOutputFile,
"MockerOutputFile": ProfilingOutputFileMocker,
"ConfigMapName": outputConfigMapName,
"Namespace": dgdr.Namespace,
"DGDRName": dgdr.Name,
"OutputPath": ProfilingOutputPath,
"OutputFile": ProfilingOutputFile,
"ConfigMapName": outputConfigMapName,
"Namespace": dgdr.Namespace,
"DGDRName": dgdr.Name,
})
if err != nil {
return nil, false, fmt.Errorf("failed to execute sidecar script template: %w", err)
......@@ -1222,7 +1213,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) enrichHardwareFromDiscovery(ctx
return nil // all fields already set by user
}
gpuInfo, err := gpu.DiscoverGPUs(ctx, r.Client)
gpuInfo, err := gpu.DiscoverGPUs(ctx, r.APIReader)
if err != nil {
return err
}
......@@ -1392,14 +1383,8 @@ func (r *DynamoGraphDeploymentRequestReconciler) generateDGDSpec(ctx context.Con
}
// Select the right config file based on mocker feature flag
// Profiler always generates both real and mocker configs
var outputFile string
if dgdr.Spec.Features != nil && dgdr.Spec.Features.Mocker != nil && dgdr.Spec.Features.Mocker.Enabled {
outputFile = ProfilingOutputFileMocker
logger.Info("Using mocker deployment config")
} else {
outputFile = ProfilingOutputFile
}
// Profiler writes the selected config (real or mocker) to a single output file
outputFile := ProfilingOutputFile
// Get YAML content from ConfigMap
yamlContent, exists := cm.Data[outputFile]
......
......@@ -94,9 +94,10 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
Namespace: namespace,
},
Spec: nvidiacomv1beta1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
Backend: "vllm",
Image: "test-profiler:latest",
Model: "test-model",
Backend: "vllm",
Image: "test-profiler:latest",
AutoApply: true,
Hardware: &nvidiacomv1beta1.HardwareSpec{
NumGPUsPerNode: ptr.To[int32](8),
GPUSKU: "H100-SXM5-80GB",
......@@ -1060,8 +1061,9 @@ var _ = Describe("DGDR Error Handling", func() {
BeforeEach(func() {
recorder = record.NewFakeRecorder(100)
reconciler = &DynamoGraphDeploymentRequestReconciler{
Client: k8sClient,
Recorder: recorder,
Client: k8sClient,
APIReader: k8sClient,
Recorder: recorder,
Config: &configv1alpha1.OperatorConfiguration{
Namespace: configv1alpha1.NamespaceConfiguration{
Restricted: "",
......@@ -1859,9 +1861,10 @@ spec:
Namespace: namespace,
},
Spec: nvidiacomv1beta1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
Backend: "vllm",
Image: "test-profiler:latest",
Model: "test-model",
Backend: "vllm",
Image: "test-profiler:latest",
AutoApply: true,
Hardware: &nvidiacomv1beta1.HardwareSpec{
NumGPUsPerNode: ptr.To[int32](8),
GPUSKU: "H100-SXM5-80GB",
......@@ -1922,7 +1925,7 @@ spec:
Namespace: namespace,
},
Data: map[string]string{
ProfilingOutputFileMocker: dgdYAML,
ProfilingOutputFile: dgdYAML,
},
}
Expect(k8sClient.Create(ctx, cm)).Should(Succeed())
......
......@@ -50,7 +50,7 @@ type GPUInfo struct {
//
// This function requires cluster-wide node read permissions and expects nodes
// to have GFD labels. If no nodes with GPU labels are found, it returns an error.
func DiscoverGPUs(ctx context.Context, k8sClient client.Client) (*GPUInfo, error) {
func DiscoverGPUs(ctx context.Context, k8sClient client.Reader) (*GPUInfo, error) {
logger := log.FromContext(ctx)
logger.Info("Starting GPU discovery from cluster nodes")
......
......@@ -26,6 +26,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log"
"github.com/ai-dynamo/dynamo/deploy/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1"
"github.com/ai-dynamo/dynamo/deploy/operator/internal/consts"
)
......@@ -159,7 +160,7 @@ func updateDynamoModelCounts(ctx context.Context, c client.Client, excludedNames
}
func updateDynamoGraphDeploymentRequestCounts(ctx context.Context, c client.Client, excludedNamespaces ExcludedNamespaces, logger logr.Logger) {
dgdrList := &v1alpha1.DynamoGraphDeploymentRequestList{}
dgdrList := &v1beta1.DynamoGraphDeploymentRequestList{}
if err := c.List(ctx, dgdrList); err != nil {
logger.Error(err, "failed to list DynamoGraphDeploymentRequests")
return
......@@ -168,7 +169,7 @@ func updateDynamoGraphDeploymentRequestCounts(ctx context.Context, c client.Clie
dgdrList.Items,
excludedNamespaces,
consts.ResourceTypeDynamoGraphDeploymentRequest,
func(d *v1alpha1.DynamoGraphDeploymentRequest) *v1alpha1.DynamoGraphDeploymentRequest { return d },
func(d *v1beta1.DynamoGraphDeploymentRequest) *v1beta1.DynamoGraphDeploymentRequest { return d },
)
}
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package defaulting
import (
"context"
"fmt"
nvidiacomv1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1"
admissionv1 "k8s.io/api/admission/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)
const (
dgdrDefaultingWebhookName = "dynamographdeploymentrequest-defaulting-webhook"
dgdrDefaultingWebhookPath = "/mutate-nvidia-com-v1beta1-dynamographdeploymentrequest"
// defaultImage is the default profiler image used when spec.image is not set.
// Default image derivation is only supported for public release versions (1.0.0+).
defaultImage = "nvcr.io/nvidia/ai-dynamo/dynamo-frontend"
)
// DGDRDefaulter is a mutating webhook handler that fills in default values for
// DynamoGraphDeploymentRequest resources on CREATE.
//
// If spec.image is not set, it is derived as:
//
// nvcr.io/nvidia/ai-dynamo/dynamo-frontend:<operatorVersion>
//
// Defaulting requires a known operator version and is only supported for
// operator versions 1.0.0 and later.
type DGDRDefaulter struct {
OperatorVersion string
}
// NewDGDRDefaulter creates a new DGDRDefaulter with the given operator version.
func NewDGDRDefaulter(operatorVersion string) *DGDRDefaulter {
return &DGDRDefaulter{OperatorVersion: operatorVersion}
}
// Default implements admission.CustomDefaulter.
// Only called on CREATE (the webhook is not registered for UPDATE).
// If spec.image is not set, derives a default image from the backend and operator version.
func (d *DGDRDefaulter) Default(ctx context.Context, obj runtime.Object) error {
logger := log.FromContext(ctx).WithName(dgdrDefaultingWebhookName)
dgdr, ok := obj.(*nvidiacomv1beta1.DynamoGraphDeploymentRequest)
if !ok {
return fmt.Errorf("expected DynamoGraphDeploymentRequest but got %T", obj)
}
req, err := admission.RequestFromContext(ctx)
if err != nil {
logger.Error(err, "failed to get admission request from context, skipping defaulting")
return nil
}
if req.Operation == admissionv1.Create && dgdr.Spec.Image == "" {
if img := d.defaultImageFor(); img != "" {
dgdr.Spec.Image = img
logger.Info("defaulted spec.image from operator version",
"name", dgdr.Name,
"namespace", dgdr.Namespace,
"image", img,
)
}
}
return nil
}
// defaultImageFor returns the default image, or empty string when the operator version
// is unknown (e.g. local dev builds), in which case the user must provide spec.image explicitly.
func (d *DGDRDefaulter) defaultImageFor() string {
if d.OperatorVersion == "" || d.OperatorVersion == "unknown" {
return ""
}
return fmt.Sprintf("%s:%s", defaultImage, d.OperatorVersion)
}
// RegisterWithManager registers the DGDR defaulting webhook with the manager.
func (d *DGDRDefaulter) RegisterWithManager(mgr manager.Manager) error {
webhook := admission.
WithCustomDefaulter(mgr.GetScheme(), &nvidiacomv1beta1.DynamoGraphDeploymentRequest{}, d).
WithRecoverPanic(true)
mgr.GetWebhookServer().Register(dgdrDefaultingWebhookPath, webhook)
return nil
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package defaulting
import (
"context"
"testing"
nvidiacomv1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1"
admissionv1 "k8s.io/api/admission/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)
func TestDGDRDefaulter_defaultImageFor(t *testing.T) {
tests := []struct {
name string
operatorVersion string
expectedImage string
}{
{
name: "known version produces default image",
operatorVersion: "1.0.0",
expectedImage: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0",
},
{
name: "pre-release version is valid",
operatorVersion: "1.0.0-rc1",
expectedImage: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0-rc1",
},
{
name: "unknown operator version cannot be defaulted",
operatorVersion: "unknown",
expectedImage: "",
},
{
name: "empty operator version cannot be defaulted",
operatorVersion: "",
expectedImage: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
d := NewDGDRDefaulter(tt.operatorVersion)
got := d.defaultImageFor()
if got != tt.expectedImage {
t.Errorf("defaultImageFor() = %q, want %q", got, tt.expectedImage)
}
})
}
}
func makeAdmissionCtx(op admissionv1.Operation) context.Context {
req := admission.Request{
AdmissionRequest: admissionv1.AdmissionRequest{
Operation: op,
},
}
return admission.NewContextWithRequest(context.Background(), req)
}
func TestDGDRDefaulter_Default(t *testing.T) {
tests := []struct {
name string
version string
operation admissionv1.Operation
initialImage string
expectedImage string
}{
{
name: "CREATE with empty image defaults to operator version",
version: "1.0.0",
operation: admissionv1.Create,
initialImage: "",
expectedImage: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0",
},
{
name: "CREATE with preset image is not overwritten",
version: "1.0.0",
operation: admissionv1.Create,
initialImage: "my-registry/my-image:custom",
expectedImage: "my-registry/my-image:custom",
},
{
name: "CREATE with unknown operator version leaves image empty",
version: "unknown",
operation: admissionv1.Create,
initialImage: "",
expectedImage: "",
},
{
name: "UPDATE does not default image",
version: "1.0.0",
operation: admissionv1.Update,
initialImage: "",
expectedImage: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
d := NewDGDRDefaulter(tt.version)
dgdr := &nvidiacomv1beta1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"},
Spec: nvidiacomv1beta1.DynamoGraphDeploymentRequestSpec{Image: tt.initialImage},
}
ctx := makeAdmissionCtx(tt.operation)
if err := d.Default(ctx, dgdr); err != nil {
t.Fatalf("Default() unexpected error: %v", err)
}
if dgdr.Spec.Image != tt.expectedImage {
t.Errorf("after Default(): spec.image = %q, want %q", dgdr.Spec.Image, tt.expectedImage)
}
})
}
}
......@@ -50,11 +50,6 @@ func NewDynamoGraphDeploymentRequestValidator(request *nvidiacomv1beta1.DynamoGr
func (v *DynamoGraphDeploymentRequestValidator) Validate() (admission.Warnings, error) {
var err error
// Validate image is specified (required for the profiling job container).
if v.request.Spec.Image == "" {
err = errors.Join(err, errors.New("spec.image is required"))
}
// Disallow searchStrategy: thorough with backend: auto.
// "thorough" sweeps more configurations and requires a concrete backend to be selected;
// "auto" defers backend selection and is only compatible with the "rapid" search strategy.
......@@ -77,6 +72,7 @@ func (v *DynamoGraphDeploymentRequestValidator) Validate() (admission.Warnings,
// validateGPUHardwareInfo ensures GPU hardware information will be available for profiling.
// Returns an error at admission time if GPU discovery is disabled and no manual hardware config is provided.
// Also validates consistency of GPU range fields.
func (v *DynamoGraphDeploymentRequestValidator) validateGPUHardwareInfo() error {
// Check if manual hardware config is provided via typed spec.hardware fields.
var hasManualHardwareConfig bool
......
......@@ -94,22 +94,7 @@ func (h *DynamoGraphDeploymentRequestHandler) ValidateUpdate(ctx context.Context
// Create validator and perform validation
validator := NewDynamoGraphDeploymentRequestValidator(newRequest, h.isClusterWideOperator, h.gpuDiscoveryEnabled)
// Validate stateless rules
warnings, err := validator.Validate()
if err != nil {
return warnings, err
}
// Validate stateful rules (immutability)
updateWarnings, err := validator.ValidateUpdate(oldRequest)
if err != nil {
return updateWarnings, err
}
// Combine warnings
warnings = append(warnings, updateWarnings...)
return warnings, nil
return validator.ValidateUpdate(oldRequest)
}
// ValidateDelete validates a DynamoGraphDeploymentRequest delete request.
......
......@@ -49,19 +49,7 @@ func TestDynamoGraphDeploymentRequestValidator_Validate(t *testing.T) {
},
isClusterWide: true,
},
{
name: "missing image",
request: &nvidiacomv1beta1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{Name: "test-dgdr", Namespace: "default"},
Spec: nvidiacomv1beta1.DynamoGraphDeploymentRequestSpec{
Model: "llama-3-8b",
Backend: nvidiacomv1beta1.BackendTypeVllm,
Image: "",
},
},
isClusterWide: true,
errMsg: "spec.image is required",
},
{
name: "thorough + auto is invalid",
request: &nvidiacomv1beta1.DynamoGraphDeploymentRequest{
......@@ -174,7 +162,7 @@ func TestDynamoGraphDeploymentRequestValidator_Validate(t *testing.T) {
errMsg: "GPU hardware configuration required: GPU discovery is disabled",
},
{
name: "multiple errors (missing image and thorough+auto)",
name: "thorough+auto is invalid regardless of image",
request: &nvidiacomv1beta1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{Name: "test-dgdr", Namespace: "default"},
Spec: nvidiacomv1beta1.DynamoGraphDeploymentRequestSpec{
......@@ -185,7 +173,7 @@ func TestDynamoGraphDeploymentRequestValidator_Validate(t *testing.T) {
},
},
isClusterWide: true,
errMsg: "spec.image is required\nspec.searchStrategy",
errMsg: "spec.searchStrategy",
},
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment