Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
b1732a5f
Unverified
Commit
b1732a5f
authored
Oct 28, 2025
by
Julien Mancuso
Committed by
GitHub
Oct 28, 2025
Browse files
fix: remove duplicates from imagePullSecrets (#3923)
Signed-off-by:
Julien Mancuso
<
jmancuso@nvidia.com
>
parent
927dcbfc
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
126 additions
and
95 deletions
+126
-95
deploy/cloud/helm/platform/components/operator/templates/deployment.yaml
...lm/platform/components/operator/templates/deployment.yaml
+2
-2
deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
...ontroller/dynamographdeploymentrequest_controller_test.go
+95
-91
deploy/cloud/operator/internal/controller_common/pod.go
deploy/cloud/operator/internal/controller_common/pod.go
+19
-2
deploy/cloud/operator/internal/controller_common/pod_test.go
deploy/cloud/operator/internal/controller_common/pod_test.go
+10
-0
No files found.
deploy/cloud/helm/platform/components/operator/templates/deployment.yaml
View file @
b1732a5f
...
...
@@ -87,12 +87,12 @@ spec:
{{
- if .Values.natsAddr
}}
-
--natsAddr={{ .Values.natsAddr }}
{{
- else
}}
-
--natsAddr=nats://{{ .Release.Name }}-nats.{{ .Release.Namespace }}:4222
-
--natsAddr=nats://{{ .Release.Name }}-nats.{{ .Release.Namespace }}
.svc.cluster.local
:4222
{{
- end
}}
{{
- if .Values.etcdAddr
}}
-
--etcdAddr={{ .Values.etcdAddr }}
{{
- else
}}
-
--etcdAddr={{ .Release.Name }}-etcd.{{ .Release.Namespace }}:2379
-
--etcdAddr={{ .Release.Name }}-etcd.{{ .Release.Namespace }}
.svc.cluster.local
:2379
{{
- end
}}
{{
- if and .Values.dynamo.istio.enabled .Values.dynamo.istio.gateway
}}
-
--istio-virtual-service-gateway={{ .Values.dynamo.istio.gateway }}
...
...
deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
View file @
b1732a5f
...
...
@@ -37,6 +37,10 @@ import (
"sigs.k8s.io/yaml"
)
const
(
defaultNamespace
=
"default"
)
// MockRBACManager implements RBACManager for testing
type
MockRBACManager
struct
{
EnsureServiceAccountWithRBACFunc
func
(
ctx
context
.
Context
,
targetNamespace
,
serviceAccountName
,
clusterRoleName
string
)
error
...
...
@@ -88,7 +92,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
It
(
"Should validate spec and transition to Pending"
,
func
()
{
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-initial"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
...
@@ -96,10 +100,10 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Model
:
"test-model"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
...
...
@@ -120,7 +124,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// First reconcile: Empty -> Pending
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
...
@@ -134,20 +138,20 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
// Check status
Eventually
(
func
()
string
{
var
updated
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
_
=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
return
updated
.
Status
.
State
},
timeout
,
interval
)
.
Should
(
Equal
(
StatePending
))
// Verify observedGeneration is set
var
updated
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
_
=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
Expect
(
updated
.
Status
.
ObservedGeneration
)
.
Should
(
Equal
(
updated
.
Generation
))
})
It
(
"Should pass validation with minimal config"
,
func
()
{
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-minimal"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
...
@@ -155,10 +159,10 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Model
:
"test-model"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"sla"
:
map
[
string
]
interface
{}{
"ttft"
:
100.0
,
...
...
@@ -170,7 +174,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Reconcile - should succeed with minimal config
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
...
@@ -184,7 +188,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
// Check status transitions to Pending (not Failed)
Eventually
(
func
()
string
{
var
updated
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
_
=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
return
updated
.
Status
.
State
},
timeout
,
interval
)
.
Should
(
Equal
(
StatePending
))
})
...
...
@@ -194,7 +198,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
It
(
"Should create online profiling job"
,
func
()
{
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-profiling-online"
namespace
:=
"
default
"
namespace
:=
default
Namespace
// Create ConfigMap for DGD base config
configMap
:=
&
corev1
.
ConfigMap
{
...
...
@@ -207,7 +211,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
}
Expect
(
k8sClient
.
Create
(
ctx
,
configMap
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
configMap
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
configMap
)
}()
// Create ServiceAccount
sa
:=
&
corev1
.
ServiceAccount
{
...
...
@@ -217,7 +221,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
}
Expect
(
k8sClient
.
Create
(
ctx
,
sa
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
sa
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
sa
)
}()
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
...
@@ -225,10 +229,10 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Model
:
"test-model"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"profiler_image"
:
"test-profiler:latest"
,
...
...
@@ -253,7 +257,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Reconcile multiple times to move through states
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
...
@@ -278,7 +282,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
// Verify job has correct labels
jobName
:=
getProfilingJobName
(
dgdr
)
job
:=
&
batchv1
.
Job
{}
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
jobName
,
Namespace
:
namespace
},
job
)
_
=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
jobName
,
Namespace
:
namespace
},
job
)
Expect
(
job
.
Labels
[
LabelApp
])
.
Should
(
Equal
(
LabelValueDynamoProfiler
))
Expect
(
job
.
Labels
[
LabelDGDR
])
.
Should
(
Equal
(
dgdrName
))
...
...
@@ -300,13 +304,13 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
))
// Clean up job
k8sClient
.
Delete
(
ctx
,
job
)
_
=
k8sClient
.
Delete
(
ctx
,
job
)
})
It
(
"Should create offline (AIC) profiling job"
,
func
()
{
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-profiling-aic"
namespace
:=
"
default
"
namespace
:=
default
Namespace
// Create ServiceAccount
sa
:=
&
corev1
.
ServiceAccount
{
...
...
@@ -315,8 +319,8 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
Namespace
:
namespace
,
},
}
_
=
k8sClient
.
Create
(
ctx
,
sa
)
defer
k8sClient
.
Delete
(
ctx
,
sa
)
Expect
(
k8sClient
.
Create
(
ctx
,
sa
)
)
.
Should
(
Succeed
())
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
sa
)
}()
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
...
@@ -324,10 +328,10 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"trtllm"
,
Model
:
"test-model"
,
Backend
:
"trtllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
...
...
@@ -355,7 +359,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Reconcile
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
...
@@ -382,7 +386,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
jobName
:=
getProfilingJobName
(
dgdr
)
job
:=
&
batchv1
.
Job
{}
if
err
:=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
jobName
,
Namespace
:
namespace
},
job
);
err
==
nil
{
k8sClient
.
Delete
(
ctx
,
job
)
_
=
k8sClient
.
Delete
(
ctx
,
job
)
}
})
})
...
...
@@ -391,7 +395,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
It
(
"Should generate DGD spec from ConfigMap"
,
func
()
{
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-profiling-complete"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
...
@@ -399,10 +403,10 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Model
:
"test-model"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
...
...
@@ -419,7 +423,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Update status to Profiling using Status subresource
dgdr
.
Status
.
State
=
StateProfiling
...
...
@@ -451,7 +455,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
}
Expect
(
k8sClient
.
Create
(
ctx
,
job
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
job
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
job
)
}()
// Update job status to completed using Status subresource
job
.
Status
.
Conditions
=
[]
batchv1
.
JobCondition
{{
...
...
@@ -481,7 +485,7 @@ spec:
},
}
Expect
(
k8sClient
.
Create
(
ctx
,
cm
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
cm
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
cm
)
}()
// Reconcile to process the profiling completion
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
...
@@ -505,7 +509,7 @@ spec:
It
(
"Should create DGD after profiling"
,
func
()
{
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-autoapply"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
...
@@ -513,10 +517,10 @@ spec:
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Model
:
"test-model"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
...
...
@@ -534,7 +538,7 @@ spec:
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Update status to Profiling using Status subresource
dgdr
.
Status
.
State
=
StateProfiling
...
...
@@ -566,7 +570,7 @@ spec:
},
}
Expect
(
k8sClient
.
Create
(
ctx
,
job
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
job
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
job
)
}()
// Update job status to completed using Status subresource
job
.
Status
.
Conditions
=
[]
batchv1
.
JobCondition
{{
...
...
@@ -596,7 +600,7 @@ spec:
},
}
Expect
(
k8sClient
.
Create
(
ctx
,
cm
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
cm
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
cm
)
}()
// Reconcile to generate spec (transitions to Deploying because autoApply=true)
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
...
@@ -620,14 +624,14 @@ spec:
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
"test-dgd-auto"
,
Namespace
:
namespace
},
dgd
))
.
Should
(
Succeed
())
// Get final DGDR status
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
)
.
Should
(
Succeed
())
Expect
(
updated
.
Status
.
Deployment
)
.
NotTo
(
BeNil
())
Expect
(
updated
.
Status
.
Deployment
.
Created
)
.
Should
(
BeTrue
())
Expect
(
updated
.
Status
.
Deployment
.
Name
)
.
Should
(
Equal
(
"test-dgd-auto"
))
// Clean up DGD
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
"test-dgd-auto"
,
Namespace
:
namespace
},
dgd
)
k8sClient
.
Delete
(
ctx
,
dgd
)
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
"test-dgd-auto"
,
Namespace
:
namespace
},
dgd
)
)
.
Should
(
Succeed
())
_
=
k8sClient
.
Delete
(
ctx
,
dgd
)
})
})
...
...
@@ -635,7 +639,7 @@ spec:
It
(
"Should reject spec changes after profiling starts"
,
func
()
{
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-immutable"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
...
@@ -643,10 +647,10 @@ spec:
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Model
:
"test-model"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
...
...
@@ -663,7 +667,7 @@ spec:
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Reconcile to initialize
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
...
@@ -673,22 +677,22 @@ spec:
// Get current generation
var
current
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
)
.
Should
(
Succeed
())
initialGeneration
:=
current
.
Generation
observedGeneration
:=
current
.
Status
.
ObservedGeneration
// Manually set state to Profiling to simulate in-progress profiling
current
.
Status
.
State
=
StateProfiling
k8sClient
.
Status
()
.
Update
(
ctx
,
&
current
)
Expect
(
k8sClient
.
Status
()
.
Update
(
ctx
,
&
current
)
)
.
Should
(
Succeed
())
// Try to modify spec
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
)
.
Should
(
Succeed
())
// Unmarshal config, modify it, and marshal back
var
config
map
[
string
]
interface
{}
yaml
.
Unmarshal
(
current
.
Spec
.
ProfilingConfig
.
Config
.
Raw
,
&
config
)
Expect
(
yaml
.
Unmarshal
(
current
.
Spec
.
ProfilingConfig
.
Config
.
Raw
,
&
config
)
)
.
Should
(
Succeed
())
config
[
"sla"
]
.
(
map
[
string
]
interface
{})[
"ttft"
]
=
200.0
current
.
Spec
.
ProfilingConfig
.
Config
=
createTestConfig
(
config
)
k8sClient
.
Update
(
ctx
,
&
current
)
Expect
(
k8sClient
.
Update
(
ctx
,
&
current
)
)
.
Should
(
Succeed
())
// Reconcile
_
,
err
=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
...
@@ -697,7 +701,7 @@ spec:
Expect
(
err
)
.
NotTo
(
HaveOccurred
())
// Verify generation changed but observedGeneration stayed the same
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
)
.
Should
(
Succeed
())
Expect
(
current
.
Generation
)
.
Should
(
BeNumerically
(
">"
,
initialGeneration
))
Expect
(
current
.
Status
.
ObservedGeneration
)
.
Should
(
Equal
(
observedGeneration
))
Expect
(
current
.
Status
.
State
)
.
Should
(
Equal
(
StateProfiling
))
// State unchanged
...
...
@@ -718,7 +722,7 @@ spec:
It
(
"Should transition to DeploymentDeleted state"
,
func
()
{
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-dgd-deleted"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
...
@@ -726,10 +730,10 @@ spec:
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Model
:
"test-model"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
...
...
@@ -747,7 +751,7 @@ spec:
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Update status to Ready with Deployment info using Status subresource
dgdr
.
Status
.
State
=
StateReady
...
...
@@ -873,10 +877,10 @@ var _ = Describe("DGDR Validation", func() {
ctx
:=
context
.
Background
()
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Model
:
"test-model"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
...
...
@@ -900,10 +904,10 @@ var _ = Describe("DGDR Validation", func() {
ctx
:=
context
.
Background
()
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Model
:
"test-model"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"sla"
:
map
[
string
]
interface
{}{
"ttft"
:
100.0
,
...
...
@@ -948,8 +952,8 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Namespace
:
namespace
,
},
}
_
=
k8sClient
.
Create
(
ctx
,
sa
)
defer
k8sClient
.
Delete
(
ctx
,
sa
)
Expect
(
k8sClient
.
Create
(
ctx
,
sa
)
)
.
Should
(
Succeed
())
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
sa
)
}()
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
...
@@ -957,10 +961,10 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"trtllm"
,
Model
:
"test-model"
,
Backend
:
"trtllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
...
...
@@ -986,7 +990,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Re-fetch DGDR to get proper metadata from API server
var
fetchedDGDR
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
...
...
@@ -1009,12 +1013,12 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Expect
(
args
)
.
Should
(
ContainElement
(
"--profile-config"
))
// Clean up
k8sClient
.
Delete
(
ctx
,
job
)
_
=
k8sClient
.
Delete
(
ctx
,
job
)
})
It
(
"Should pass config with AI Configurator settings for offline profiling"
,
func
()
{
ctx
:=
context
.
Background
()
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdrName
:=
"test-args-offline"
// Create ServiceAccount
...
...
@@ -1024,8 +1028,8 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Namespace
:
namespace
,
},
}
_
=
k8sClient
.
Create
(
ctx
,
sa
)
defer
k8sClient
.
Delete
(
ctx
,
sa
)
Expect
(
k8sClient
.
Create
(
ctx
,
sa
)
)
.
Should
(
Succeed
())
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
sa
)
}()
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
...
@@ -1033,10 +1037,10 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"trtllm"
,
Model
:
"test-model"
,
Backend
:
"trtllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
...
...
@@ -1065,7 +1069,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Re-fetch DGDR to get proper metadata from API server
var
fetchedDGDR
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
...
...
@@ -1088,7 +1092,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Expect
(
args
)
.
Should
(
ContainElement
(
"--profile-config"
))
// Clean up
k8sClient
.
Delete
(
ctx
,
job
)
_
=
k8sClient
.
Delete
(
ctx
,
job
)
})
})
})
...
...
@@ -1112,7 +1116,7 @@ var _ = Describe("DGDR Error Handling", func() {
Context
(
"When profiling job fails"
,
func
()
{
It
(
"Should capture detailed error from pod termination state"
,
func
()
{
ctx
:=
context
.
Background
()
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdrName
:=
"test-error-capture"
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
...
...
@@ -1121,10 +1125,10 @@ var _ = Describe("DGDR Error Handling", func() {
Namespace
:
namespace
,
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Model
:
"test-model"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
...
...
@@ -1145,7 +1149,7 @@ var _ = Describe("DGDR Error Handling", func() {
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Set status to Profiling
dgdr
.
Status
.
State
=
StateProfiling
...
...
@@ -1178,7 +1182,7 @@ var _ = Describe("DGDR Error Handling", func() {
},
}
Expect
(
k8sClient
.
Create
(
ctx
,
job
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
job
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
job
)
}()
// Update job status
job
.
Status
.
Conditions
=
[]
batchv1
.
JobCondition
{{
...
...
@@ -1219,7 +1223,7 @@ var _ = Describe("DGDR Error Handling", func() {
},
}
Expect
(
k8sClient
.
Create
(
ctx
,
pod
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
pod
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
pod
)
}()
// Reconcile - should capture error details
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
...
deploy/cloud/operator/internal/controller_common/pod.go
View file @
b1732a5f
...
...
@@ -101,9 +101,11 @@ func CanonicalizePodSpec(podSpec *corev1.PodSpec) *corev1.PodSpec {
// Sort image pull secrets
if
len
(
podSpec
.
ImagePullSecrets
)
>
1
{
sort
.
Slice
(
podSpec
.
ImagePullSecrets
,
func
(
i
,
j
int
)
bool
{
return
podSpec
.
ImagePullSecrets
[
i
]
.
Name
<
podSpec
.
ImagePullSecrets
[
j
]
.
Name
uniqueSecrets
:=
ensureUniqueImagePullSecrets
(
podSpec
.
ImagePullSecrets
)
sort
.
Slice
(
uniqueSecrets
,
func
(
i
,
j
int
)
bool
{
return
uniqueSecrets
[
i
]
.
Name
<
uniqueSecrets
[
j
]
.
Name
})
podSpec
.
ImagePullSecrets
=
uniqueSecrets
}
// Sort volumes and their nested items
...
...
@@ -275,3 +277,18 @@ func CanonicalizePodSpec(podSpec *corev1.PodSpec) *corev1.PodSpec {
return
podSpec
}
func
ensureUniqueImagePullSecrets
(
secrets
[]
corev1
.
LocalObjectReference
)
[]
corev1
.
LocalObjectReference
{
if
len
(
secrets
)
==
0
{
return
nil
}
uniqueSecrets
:=
make
(
map
[
string
]
corev1
.
LocalObjectReference
)
for
_
,
secret
:=
range
secrets
{
uniqueSecrets
[
secret
.
Name
]
=
secret
}
uniqueSecretsList
:=
make
([]
corev1
.
LocalObjectReference
,
0
,
len
(
uniqueSecrets
))
for
secretName
:=
range
uniqueSecrets
{
uniqueSecretsList
=
append
(
uniqueSecretsList
,
corev1
.
LocalObjectReference
{
Name
:
secretName
})
}
return
uniqueSecretsList
}
deploy/cloud/operator/internal/controller_common/pod_test.go
View file @
b1732a5f
...
...
@@ -208,6 +208,7 @@ func TestCanonicalizePodSpec(t *testing.T) {
{
Name
:
"registry-z"
},
{
Name
:
"registry-a"
},
{
Name
:
"registry-b"
},
{
Name
:
"registry-a"
},
},
},
expected
:
&
corev1
.
PodSpec
{
...
...
@@ -218,6 +219,15 @@ func TestCanonicalizePodSpec(t *testing.T) {
},
},
},
{
name
:
"sorts nil image pull secrets"
,
input
:
&
corev1
.
PodSpec
{
ImagePullSecrets
:
nil
,
},
expected
:
&
corev1
.
PodSpec
{
ImagePullSecrets
:
nil
,
},
},
{
name
:
"sorts volumes by name"
,
input
:
&
corev1
.
PodSpec
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment