Unverified Commit b1732a5f authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

fix: remove duplicates from imagePullSecrets (#3923)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 927dcbfc
......@@ -87,12 +87,12 @@ spec:
{{- if .Values.natsAddr }}
- --natsAddr={{ .Values.natsAddr }}
{{- else }}
- --natsAddr=nats://{{ .Release.Name }}-nats.{{ .Release.Namespace }}:4222
- --natsAddr=nats://{{ .Release.Name }}-nats.{{ .Release.Namespace }}.svc.cluster.local:4222
{{- end }}
{{- if .Values.etcdAddr }}
- --etcdAddr={{ .Values.etcdAddr }}
{{- else }}
- --etcdAddr={{ .Release.Name }}-etcd.{{ .Release.Namespace }}:2379
- --etcdAddr={{ .Release.Name }}-etcd.{{ .Release.Namespace }}.svc.cluster.local:2379
{{- end }}
{{- if and .Values.dynamo.istio.enabled .Values.dynamo.istio.gateway }}
- --istio-virtual-service-gateway={{ .Values.dynamo.istio.gateway }}
......
......@@ -37,6 +37,10 @@ import (
"sigs.k8s.io/yaml"
)
const (
defaultNamespace = "default"
)
// MockRBACManager implements RBACManager for testing
type MockRBACManager struct {
EnsureServiceAccountWithRBACFunc func(ctx context.Context, targetNamespace, serviceAccountName, clusterRoleName string) error
......@@ -88,7 +92,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
It("Should validate spec and transition to Pending", func() {
ctx := context.Background()
dgdrName := "test-dgdr-initial"
namespace := "default"
namespace := defaultNamespace
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -97,9 +101,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "vllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"config": "/tmp/test-config.yaml",
......@@ -120,7 +124,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// First reconcile: Empty -> Pending
_, err := reconciler.Reconcile(ctx, reconcile.Request{
......@@ -134,20 +138,20 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
// Check status
Eventually(func() string {
var updated nvidiacomv1alpha1.DynamoGraphDeploymentRequest
k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)
_ = k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)
return updated.Status.State
}, timeout, interval).Should(Equal(StatePending))
// Verify observedGeneration is set
var updated nvidiacomv1alpha1.DynamoGraphDeploymentRequest
k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)
_ = k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)
Expect(updated.Status.ObservedGeneration).Should(Equal(updated.Generation))
})
It("Should pass validation with minimal config", func() {
ctx := context.Background()
dgdrName := "test-dgdr-minimal"
namespace := "default"
namespace := defaultNamespace
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -156,9 +160,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "vllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"sla": map[string]interface{}{
"ttft": 100.0,
......@@ -170,7 +174,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Reconcile - should succeed with minimal config
_, err := reconciler.Reconcile(ctx, reconcile.Request{
......@@ -184,7 +188,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
// Check status transitions to Pending (not Failed)
Eventually(func() string {
var updated nvidiacomv1alpha1.DynamoGraphDeploymentRequest
k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)
_ = k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)
return updated.Status.State
}, timeout, interval).Should(Equal(StatePending))
})
......@@ -194,7 +198,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
It("Should create online profiling job", func() {
ctx := context.Background()
dgdrName := "test-dgdr-profiling-online"
namespace := "default"
namespace := defaultNamespace
// Create ConfigMap for DGD base config
configMap := &corev1.ConfigMap{
......@@ -207,7 +211,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
}
Expect(k8sClient.Create(ctx, configMap)).Should(Succeed())
defer k8sClient.Delete(ctx, configMap)
defer func() { _ = k8sClient.Delete(ctx, configMap) }()
// Create ServiceAccount
sa := &corev1.ServiceAccount{
......@@ -217,7 +221,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
}
Expect(k8sClient.Create(ctx, sa)).Should(Succeed())
defer k8sClient.Delete(ctx, sa)
defer func() { _ = k8sClient.Delete(ctx, sa) }()
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -226,9 +230,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "vllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"profiler_image": "test-profiler:latest",
......@@ -253,7 +257,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Reconcile multiple times to move through states
_, err := reconciler.Reconcile(ctx, reconcile.Request{
......@@ -278,7 +282,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
// Verify job has correct labels
jobName := getProfilingJobName(dgdr)
job := &batchv1.Job{}
k8sClient.Get(ctx, types.NamespacedName{Name: jobName, Namespace: namespace}, job)
_ = k8sClient.Get(ctx, types.NamespacedName{Name: jobName, Namespace: namespace}, job)
Expect(job.Labels[LabelApp]).Should(Equal(LabelValueDynamoProfiler))
Expect(job.Labels[LabelDGDR]).Should(Equal(dgdrName))
......@@ -300,13 +304,13 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
))
// Clean up job
k8sClient.Delete(ctx, job)
_ = k8sClient.Delete(ctx, job)
})
It("Should create offline (AIC) profiling job", func() {
ctx := context.Background()
dgdrName := "test-dgdr-profiling-aic"
namespace := "default"
namespace := defaultNamespace
// Create ServiceAccount
sa := &corev1.ServiceAccount{
......@@ -315,8 +319,8 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
Namespace: namespace,
},
}
_ = k8sClient.Create(ctx, sa)
defer k8sClient.Delete(ctx, sa)
Expect(k8sClient.Create(ctx, sa)).Should(Succeed())
defer func() { _ = k8sClient.Delete(ctx, sa) }()
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -325,9 +329,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "trtllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"config": "/tmp/test-config.yaml",
......@@ -355,7 +359,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Reconcile
_, err := reconciler.Reconcile(ctx, reconcile.Request{
......@@ -382,7 +386,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
jobName := getProfilingJobName(dgdr)
job := &batchv1.Job{}
if err := k8sClient.Get(ctx, types.NamespacedName{Name: jobName, Namespace: namespace}, job); err == nil {
k8sClient.Delete(ctx, job)
_ = k8sClient.Delete(ctx, job)
}
})
})
......@@ -391,7 +395,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
It("Should generate DGD spec from ConfigMap", func() {
ctx := context.Background()
dgdrName := "test-dgdr-profiling-complete"
namespace := "default"
namespace := defaultNamespace
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -400,9 +404,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "vllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"config": "/tmp/test-config.yaml",
......@@ -419,7 +423,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Update status to Profiling using Status subresource
dgdr.Status.State = StateProfiling
......@@ -451,7 +455,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
}
Expect(k8sClient.Create(ctx, job)).Should(Succeed())
defer k8sClient.Delete(ctx, job)
defer func() { _ = k8sClient.Delete(ctx, job) }()
// Update job status to completed using Status subresource
job.Status.Conditions = []batchv1.JobCondition{{
......@@ -481,7 +485,7 @@ spec:
},
}
Expect(k8sClient.Create(ctx, cm)).Should(Succeed())
defer k8sClient.Delete(ctx, cm)
defer func() { _ = k8sClient.Delete(ctx, cm) }()
// Reconcile to process the profiling completion
_, err := reconciler.Reconcile(ctx, reconcile.Request{
......@@ -505,7 +509,7 @@ spec:
It("Should create DGD after profiling", func() {
ctx := context.Background()
dgdrName := "test-dgdr-autoapply"
namespace := "default"
namespace := defaultNamespace
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -514,9 +518,9 @@ spec:
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "vllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"config": "/tmp/test-config.yaml",
......@@ -534,7 +538,7 @@ spec:
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Update status to Profiling using Status subresource
dgdr.Status.State = StateProfiling
......@@ -566,7 +570,7 @@ spec:
},
}
Expect(k8sClient.Create(ctx, job)).Should(Succeed())
defer k8sClient.Delete(ctx, job)
defer func() { _ = k8sClient.Delete(ctx, job) }()
// Update job status to completed using Status subresource
job.Status.Conditions = []batchv1.JobCondition{{
......@@ -596,7 +600,7 @@ spec:
},
}
Expect(k8sClient.Create(ctx, cm)).Should(Succeed())
defer k8sClient.Delete(ctx, cm)
defer func() { _ = k8sClient.Delete(ctx, cm) }()
// Reconcile to generate spec (transitions to Deploying because autoApply=true)
_, err := reconciler.Reconcile(ctx, reconcile.Request{
......@@ -620,14 +624,14 @@ spec:
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: "test-dgd-auto", Namespace: namespace}, dgd)).Should(Succeed())
// Get final DGDR status
k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)).Should(Succeed())
Expect(updated.Status.Deployment).NotTo(BeNil())
Expect(updated.Status.Deployment.Created).Should(BeTrue())
Expect(updated.Status.Deployment.Name).Should(Equal("test-dgd-auto"))
// Clean up DGD
k8sClient.Get(ctx, types.NamespacedName{Name: "test-dgd-auto", Namespace: namespace}, dgd)
k8sClient.Delete(ctx, dgd)
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: "test-dgd-auto", Namespace: namespace}, dgd)).Should(Succeed())
_ = k8sClient.Delete(ctx, dgd)
})
})
......@@ -635,7 +639,7 @@ spec:
It("Should reject spec changes after profiling starts", func() {
ctx := context.Background()
dgdrName := "test-dgdr-immutable"
namespace := "default"
namespace := defaultNamespace
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -644,9 +648,9 @@ spec:
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "vllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"config": "/tmp/test-config.yaml",
......@@ -663,7 +667,7 @@ spec:
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Reconcile to initialize
_, err := reconciler.Reconcile(ctx, reconcile.Request{
......@@ -673,22 +677,22 @@ spec:
// Get current generation
var current nvidiacomv1alpha1.DynamoGraphDeploymentRequest
k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &current)
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &current)).Should(Succeed())
initialGeneration := current.Generation
observedGeneration := current.Status.ObservedGeneration
// Manually set state to Profiling to simulate in-progress profiling
current.Status.State = StateProfiling
k8sClient.Status().Update(ctx, &current)
Expect(k8sClient.Status().Update(ctx, &current)).Should(Succeed())
// Try to modify spec
k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &current)
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &current)).Should(Succeed())
// Unmarshal config, modify it, and marshal back
var config map[string]interface{}
yaml.Unmarshal(current.Spec.ProfilingConfig.Config.Raw, &config)
Expect(yaml.Unmarshal(current.Spec.ProfilingConfig.Config.Raw, &config)).Should(Succeed())
config["sla"].(map[string]interface{})["ttft"] = 200.0
current.Spec.ProfilingConfig.Config = createTestConfig(config)
k8sClient.Update(ctx, &current)
Expect(k8sClient.Update(ctx, &current)).Should(Succeed())
// Reconcile
_, err = reconciler.Reconcile(ctx, reconcile.Request{
......@@ -697,7 +701,7 @@ spec:
Expect(err).NotTo(HaveOccurred())
// Verify generation changed but observedGeneration stayed the same
k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &current)
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &current)).Should(Succeed())
Expect(current.Generation).Should(BeNumerically(">", initialGeneration))
Expect(current.Status.ObservedGeneration).Should(Equal(observedGeneration))
Expect(current.Status.State).Should(Equal(StateProfiling)) // State unchanged
......@@ -718,7 +722,7 @@ spec:
It("Should transition to DeploymentDeleted state", func() {
ctx := context.Background()
dgdrName := "test-dgdr-dgd-deleted"
namespace := "default"
namespace := defaultNamespace
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -727,9 +731,9 @@ spec:
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "vllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"config": "/tmp/test-config.yaml",
......@@ -747,7 +751,7 @@ spec:
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Update status to Ready with Deployment info using Status subresource
dgdr.Status.State = StateReady
......@@ -874,9 +878,9 @@ var _ = Describe("DGDR Validation", func() {
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "vllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"config": "/tmp/test-config.yaml",
......@@ -901,9 +905,9 @@ var _ = Describe("DGDR Validation", func() {
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "vllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"sla": map[string]interface{}{
"ttft": 100.0,
......@@ -948,8 +952,8 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Namespace: namespace,
},
}
_ = k8sClient.Create(ctx, sa)
defer k8sClient.Delete(ctx, sa)
Expect(k8sClient.Create(ctx, sa)).Should(Succeed())
defer func() { _ = k8sClient.Delete(ctx, sa) }()
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -958,9 +962,9 @@ var _ = Describe("DGDR Profiler Arguments", func() {
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "trtllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"config": "/tmp/test-config.yaml",
......@@ -986,7 +990,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Re-fetch DGDR to get proper metadata from API server
var fetchedDGDR nvidiacomv1alpha1.DynamoGraphDeploymentRequest
......@@ -1009,12 +1013,12 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Expect(args).Should(ContainElement("--profile-config"))
// Clean up
k8sClient.Delete(ctx, job)
_ = k8sClient.Delete(ctx, job)
})
It("Should pass config with AI Configurator settings for offline profiling", func() {
ctx := context.Background()
namespace := "default"
namespace := defaultNamespace
dgdrName := "test-args-offline"
// Create ServiceAccount
......@@ -1024,8 +1028,8 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Namespace: namespace,
},
}
_ = k8sClient.Create(ctx, sa)
defer k8sClient.Delete(ctx, sa)
Expect(k8sClient.Create(ctx, sa)).Should(Succeed())
defer func() { _ = k8sClient.Delete(ctx, sa) }()
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -1034,9 +1038,9 @@ var _ = Describe("DGDR Profiler Arguments", func() {
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "trtllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"config": "/tmp/test-config.yaml",
......@@ -1065,7 +1069,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Re-fetch DGDR to get proper metadata from API server
var fetchedDGDR nvidiacomv1alpha1.DynamoGraphDeploymentRequest
......@@ -1088,7 +1092,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Expect(args).Should(ContainElement("--profile-config"))
// Clean up
k8sClient.Delete(ctx, job)
_ = k8sClient.Delete(ctx, job)
})
})
})
......@@ -1112,7 +1116,7 @@ var _ = Describe("DGDR Error Handling", func() {
Context("When profiling job fails", func() {
It("Should capture detailed error from pod termination state", func() {
ctx := context.Background()
namespace := "default"
namespace := defaultNamespace
dgdrName := "test-error-capture"
dgdr := &nvidiacomv1alpha1.DynamoGraphDeploymentRequest{
......@@ -1122,9 +1126,9 @@ var _ = Describe("DGDR Error Handling", func() {
},
Spec: nvidiacomv1alpha1.DynamoGraphDeploymentRequestSpec{
Model: "test-model",
ProfilerImage: "test-profiler:latest",
Backend: "vllm",
ProfilingConfig: nvidiacomv1alpha1.ProfilingConfigSpec{
ProfilerImage: "test-profiler:latest",
Config: createTestConfig(map[string]interface{}{
"engine": map[string]interface{}{
"config": "/tmp/test-config.yaml",
......@@ -1145,7 +1149,7 @@ var _ = Describe("DGDR Error Handling", func() {
}
Expect(k8sClient.Create(ctx, dgdr)).Should(Succeed())
defer k8sClient.Delete(ctx, dgdr)
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Set status to Profiling
dgdr.Status.State = StateProfiling
......@@ -1178,7 +1182,7 @@ var _ = Describe("DGDR Error Handling", func() {
},
}
Expect(k8sClient.Create(ctx, job)).Should(Succeed())
defer k8sClient.Delete(ctx, job)
defer func() { _ = k8sClient.Delete(ctx, job) }()
// Update job status
job.Status.Conditions = []batchv1.JobCondition{{
......@@ -1219,7 +1223,7 @@ var _ = Describe("DGDR Error Handling", func() {
},
}
Expect(k8sClient.Create(ctx, pod)).Should(Succeed())
defer k8sClient.Delete(ctx, pod)
defer func() { _ = k8sClient.Delete(ctx, pod) }()
// Reconcile - should capture error details
_, err := reconciler.Reconcile(ctx, reconcile.Request{
......
......@@ -101,9 +101,11 @@ func CanonicalizePodSpec(podSpec *corev1.PodSpec) *corev1.PodSpec {
// Sort image pull secrets
if len(podSpec.ImagePullSecrets) > 1 {
sort.Slice(podSpec.ImagePullSecrets, func(i, j int) bool {
return podSpec.ImagePullSecrets[i].Name < podSpec.ImagePullSecrets[j].Name
uniqueSecrets := ensureUniqueImagePullSecrets(podSpec.ImagePullSecrets)
sort.Slice(uniqueSecrets, func(i, j int) bool {
return uniqueSecrets[i].Name < uniqueSecrets[j].Name
})
podSpec.ImagePullSecrets = uniqueSecrets
}
// Sort volumes and their nested items
......@@ -275,3 +277,18 @@ func CanonicalizePodSpec(podSpec *corev1.PodSpec) *corev1.PodSpec {
return podSpec
}
func ensureUniqueImagePullSecrets(secrets []corev1.LocalObjectReference) []corev1.LocalObjectReference {
if len(secrets) == 0 {
return nil
}
uniqueSecrets := make(map[string]corev1.LocalObjectReference)
for _, secret := range secrets {
uniqueSecrets[secret.Name] = secret
}
uniqueSecretsList := make([]corev1.LocalObjectReference, 0, len(uniqueSecrets))
for secretName := range uniqueSecrets {
uniqueSecretsList = append(uniqueSecretsList, corev1.LocalObjectReference{Name: secretName})
}
return uniqueSecretsList
}
......@@ -208,6 +208,7 @@ func TestCanonicalizePodSpec(t *testing.T) {
{Name: "registry-z"},
{Name: "registry-a"},
{Name: "registry-b"},
{Name: "registry-a"},
},
},
expected: &corev1.PodSpec{
......@@ -218,6 +219,15 @@ func TestCanonicalizePodSpec(t *testing.T) {
},
},
},
{
name: "sorts nil image pull secrets",
input: &corev1.PodSpec{
ImagePullSecrets: nil,
},
expected: &corev1.PodSpec{
ImagePullSecrets: nil,
},
},
{
name: "sorts volumes by name",
input: &corev1.PodSpec{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment