dynamographdeploymentrequest_handler.go 5.77 KB
Newer Older
1
/*
2
 * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package validation

import (
	"context"
	"fmt"

24
	nvidiacomv1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1"
25
26
	"github.com/ai-dynamo/dynamo/deploy/operator/internal/consts"
	"github.com/ai-dynamo/dynamo/deploy/operator/internal/observability"
27
	internalwebhook "github.com/ai-dynamo/dynamo/deploy/operator/internal/webhook"
28
29
30
31
32
33
34
35
36
	"k8s.io/apimachinery/pkg/runtime"
	"sigs.k8s.io/controller-runtime/pkg/log"
	"sigs.k8s.io/controller-runtime/pkg/manager"
	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)

const (
	// DynamoGraphDeploymentRequestWebhookName is the name of the validating webhook handler for DynamoGraphDeploymentRequest.
	DynamoGraphDeploymentRequestWebhookName = "dynamographdeploymentrequest-validating-webhook"
37
	dynamoGraphDeploymentRequestWebhookPath = "/validate-nvidia-com-v1beta1-dynamographdeploymentrequest"
38
39
40
41
42
43
)

// DynamoGraphDeploymentRequestHandler is a handler for validating DynamoGraphDeploymentRequest resources.
// It is a thin wrapper around DynamoGraphDeploymentRequestValidator.
type DynamoGraphDeploymentRequestHandler struct {
	isClusterWideOperator bool
44
	gpuDiscoveryEnabled   bool
45
46
47
}

// NewDynamoGraphDeploymentRequestHandler creates a new handler for DynamoGraphDeploymentRequest Webhook.
48
49
50
// isClusterWide indicates whether the operator has cluster-wide permissions.
// gpuDiscoveryEnabled indicates whether a ClusterRole for node read access was provisioned by Helm.
func NewDynamoGraphDeploymentRequestHandler(isClusterWide bool, gpuDiscoveryEnabled bool) *DynamoGraphDeploymentRequestHandler {
51
52
	return &DynamoGraphDeploymentRequestHandler{
		isClusterWideOperator: isClusterWide,
53
		gpuDiscoveryEnabled:   gpuDiscoveryEnabled,
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
	}
}

// ValidateCreate validates a DynamoGraphDeploymentRequest create request.
func (h *DynamoGraphDeploymentRequestHandler) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
	logger := log.FromContext(ctx).WithName(DynamoGraphDeploymentRequestWebhookName)

	request, err := castToDynamoGraphDeploymentRequest(obj)
	if err != nil {
		return nil, err
	}

	logger.Info("validate create", "name", request.Name, "namespace", request.Namespace)

	// Create validator and perform validation
69
	validator := NewDynamoGraphDeploymentRequestValidator(request, h.isClusterWideOperator, h.gpuDiscoveryEnabled)
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
	return validator.Validate()
}

// ValidateUpdate validates a DynamoGraphDeploymentRequest update request.
func (h *DynamoGraphDeploymentRequestHandler) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) {
	logger := log.FromContext(ctx).WithName(DynamoGraphDeploymentRequestWebhookName)

	newRequest, err := castToDynamoGraphDeploymentRequest(newObj)
	if err != nil {
		return nil, err
	}

	logger.Info("validate update", "name", newRequest.Name, "namespace", newRequest.Namespace)

	// Skip validation if the resource is being deleted (to allow finalizer removal)
	if !newRequest.DeletionTimestamp.IsZero() {
		logger.Info("skipping validation for resource being deleted", "name", newRequest.Name)
		return nil, nil
	}

	oldRequest, err := castToDynamoGraphDeploymentRequest(oldObj)
	if err != nil {
		return nil, err
	}

	// Create validator and perform validation
96
	validator := NewDynamoGraphDeploymentRequestValidator(newRequest, h.isClusterWideOperator, h.gpuDiscoveryEnabled)
97
	return validator.ValidateUpdate(oldRequest)
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
}

// ValidateDelete validates a DynamoGraphDeploymentRequest delete request.
func (h *DynamoGraphDeploymentRequestHandler) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
	logger := log.FromContext(ctx).WithName(DynamoGraphDeploymentRequestWebhookName)

	request, err := castToDynamoGraphDeploymentRequest(obj)
	if err != nil {
		return nil, err
	}

	logger.Info("validate delete", "name", request.Name, "namespace", request.Namespace)

	// No special validation needed for deletion
	return nil, nil
}

// RegisterWithManager registers the webhook with the manager.
// The handler is automatically wrapped with LeaseAwareValidator to add namespace exclusion logic.
func (h *DynamoGraphDeploymentRequestHandler) RegisterWithManager(mgr manager.Manager) error {
	// Wrap the handler with lease-aware logic for cluster-wide coordination
119
120
121
122
	leaseAwareValidator := internalwebhook.NewLeaseAwareValidator(h, internalwebhook.GetExcludedNamespaces())

	// Wrap with metrics collection
	observedValidator := observability.NewObservedValidator(leaseAwareValidator, consts.ResourceTypeDynamoGraphDeploymentRequest)
123
124

	webhook := admission.
125
		WithCustomValidator(mgr.GetScheme(), &nvidiacomv1beta1.DynamoGraphDeploymentRequest{}, observedValidator).
126
127
128
129
130
131
		WithRecoverPanic(true)
	mgr.GetWebhookServer().Register(dynamoGraphDeploymentRequestWebhookPath, webhook)
	return nil
}

// castToDynamoGraphDeploymentRequest attempts to cast a runtime.Object to a DynamoGraphDeploymentRequest.
132
133
func castToDynamoGraphDeploymentRequest(obj runtime.Object) (*nvidiacomv1beta1.DynamoGraphDeploymentRequest, error) {
	request, ok := obj.(*nvidiacomv1beta1.DynamoGraphDeploymentRequest)
134
135
136
137
138
	if !ok {
		return nil, fmt.Errorf("expected DynamoGraphDeploymentRequest but got %T", obj)
	}
	return request, nil
}