Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
b1732a5f
Unverified
Commit
b1732a5f
authored
Oct 28, 2025
by
Julien Mancuso
Committed by
GitHub
Oct 28, 2025
Browse files
fix: remove duplicates from imagePullSecrets (#3923)
Signed-off-by:
Julien Mancuso
<
jmancuso@nvidia.com
>
parent
927dcbfc
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
126 additions
and
95 deletions
+126
-95
deploy/cloud/helm/platform/components/operator/templates/deployment.yaml
...lm/platform/components/operator/templates/deployment.yaml
+2
-2
deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
...ontroller/dynamographdeploymentrequest_controller_test.go
+95
-91
deploy/cloud/operator/internal/controller_common/pod.go
deploy/cloud/operator/internal/controller_common/pod.go
+19
-2
deploy/cloud/operator/internal/controller_common/pod_test.go
deploy/cloud/operator/internal/controller_common/pod_test.go
+10
-0
No files found.
deploy/cloud/helm/platform/components/operator/templates/deployment.yaml
View file @
b1732a5f
...
@@ -87,12 +87,12 @@ spec:
...
@@ -87,12 +87,12 @@ spec:
{{
- if .Values.natsAddr
}}
{{
- if .Values.natsAddr
}}
-
--natsAddr={{ .Values.natsAddr }}
-
--natsAddr={{ .Values.natsAddr }}
{{
- else
}}
{{
- else
}}
-
--natsAddr=nats://{{ .Release.Name }}-nats.{{ .Release.Namespace }}:4222
-
--natsAddr=nats://{{ .Release.Name }}-nats.{{ .Release.Namespace }}
.svc.cluster.local
:4222
{{
- end
}}
{{
- end
}}
{{
- if .Values.etcdAddr
}}
{{
- if .Values.etcdAddr
}}
-
--etcdAddr={{ .Values.etcdAddr }}
-
--etcdAddr={{ .Values.etcdAddr }}
{{
- else
}}
{{
- else
}}
-
--etcdAddr={{ .Release.Name }}-etcd.{{ .Release.Namespace }}:2379
-
--etcdAddr={{ .Release.Name }}-etcd.{{ .Release.Namespace }}
.svc.cluster.local
:2379
{{
- end
}}
{{
- end
}}
{{
- if and .Values.dynamo.istio.enabled .Values.dynamo.istio.gateway
}}
{{
- if and .Values.dynamo.istio.enabled .Values.dynamo.istio.gateway
}}
-
--istio-virtual-service-gateway={{ .Values.dynamo.istio.gateway }}
-
--istio-virtual-service-gateway={{ .Values.dynamo.istio.gateway }}
...
...
deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
View file @
b1732a5f
...
@@ -37,6 +37,10 @@ import (
...
@@ -37,6 +37,10 @@ import (
"sigs.k8s.io/yaml"
"sigs.k8s.io/yaml"
)
)
const
(
defaultNamespace
=
"default"
)
// MockRBACManager implements RBACManager for testing
// MockRBACManager implements RBACManager for testing
type
MockRBACManager
struct
{
type
MockRBACManager
struct
{
EnsureServiceAccountWithRBACFunc
func
(
ctx
context
.
Context
,
targetNamespace
,
serviceAccountName
,
clusterRoleName
string
)
error
EnsureServiceAccountWithRBACFunc
func
(
ctx
context
.
Context
,
targetNamespace
,
serviceAccountName
,
clusterRoleName
string
)
error
...
@@ -88,7 +92,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -88,7 +92,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
It
(
"Should validate spec and transition to Pending"
,
func
()
{
It
(
"Should validate spec and transition to Pending"
,
func
()
{
ctx
:=
context
.
Background
()
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-initial"
dgdrName
:=
"test-dgdr-initial"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
@@ -97,9 +101,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -97,9 +101,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
"config"
:
"/tmp/test-config.yaml"
,
...
@@ -120,7 +124,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -120,7 +124,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// First reconcile: Empty -> Pending
// First reconcile: Empty -> Pending
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
@@ -134,20 +138,20 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -134,20 +138,20 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
// Check status
// Check status
Eventually
(
func
()
string
{
Eventually
(
func
()
string
{
var
updated
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
var
updated
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
_
=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
return
updated
.
Status
.
State
return
updated
.
Status
.
State
},
timeout
,
interval
)
.
Should
(
Equal
(
StatePending
))
},
timeout
,
interval
)
.
Should
(
Equal
(
StatePending
))
// Verify observedGeneration is set
// Verify observedGeneration is set
var
updated
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
var
updated
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
_
=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
Expect
(
updated
.
Status
.
ObservedGeneration
)
.
Should
(
Equal
(
updated
.
Generation
))
Expect
(
updated
.
Status
.
ObservedGeneration
)
.
Should
(
Equal
(
updated
.
Generation
))
})
})
It
(
"Should pass validation with minimal config"
,
func
()
{
It
(
"Should pass validation with minimal config"
,
func
()
{
ctx
:=
context
.
Background
()
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-minimal"
dgdrName
:=
"test-dgdr-minimal"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
@@ -156,9 +160,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -156,9 +160,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"sla"
:
map
[
string
]
interface
{}{
"sla"
:
map
[
string
]
interface
{}{
"ttft"
:
100.0
,
"ttft"
:
100.0
,
...
@@ -170,7 +174,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -170,7 +174,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Reconcile - should succeed with minimal config
// Reconcile - should succeed with minimal config
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
@@ -184,7 +188,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -184,7 +188,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
// Check status transitions to Pending (not Failed)
// Check status transitions to Pending (not Failed)
Eventually
(
func
()
string
{
Eventually
(
func
()
string
{
var
updated
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
var
updated
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
_
=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
return
updated
.
Status
.
State
return
updated
.
Status
.
State
},
timeout
,
interval
)
.
Should
(
Equal
(
StatePending
))
},
timeout
,
interval
)
.
Should
(
Equal
(
StatePending
))
})
})
...
@@ -194,7 +198,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -194,7 +198,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
It
(
"Should create online profiling job"
,
func
()
{
It
(
"Should create online profiling job"
,
func
()
{
ctx
:=
context
.
Background
()
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-profiling-online"
dgdrName
:=
"test-dgdr-profiling-online"
namespace
:=
"
default
"
namespace
:=
default
Namespace
// Create ConfigMap for DGD base config
// Create ConfigMap for DGD base config
configMap
:=
&
corev1
.
ConfigMap
{
configMap
:=
&
corev1
.
ConfigMap
{
...
@@ -207,7 +211,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -207,7 +211,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
},
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
configMap
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
configMap
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
configMap
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
configMap
)
}()
// Create ServiceAccount
// Create ServiceAccount
sa
:=
&
corev1
.
ServiceAccount
{
sa
:=
&
corev1
.
ServiceAccount
{
...
@@ -217,7 +221,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -217,7 +221,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
},
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
sa
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
sa
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
sa
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
sa
)
}()
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
@@ -226,9 +230,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -226,9 +230,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"profiler_image"
:
"test-profiler:latest"
,
"profiler_image"
:
"test-profiler:latest"
,
...
@@ -253,7 +257,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -253,7 +257,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Reconcile multiple times to move through states
// Reconcile multiple times to move through states
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
@@ -278,7 +282,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -278,7 +282,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
// Verify job has correct labels
// Verify job has correct labels
jobName
:=
getProfilingJobName
(
dgdr
)
jobName
:=
getProfilingJobName
(
dgdr
)
job
:=
&
batchv1
.
Job
{}
job
:=
&
batchv1
.
Job
{}
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
jobName
,
Namespace
:
namespace
},
job
)
_
=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
jobName
,
Namespace
:
namespace
},
job
)
Expect
(
job
.
Labels
[
LabelApp
])
.
Should
(
Equal
(
LabelValueDynamoProfiler
))
Expect
(
job
.
Labels
[
LabelApp
])
.
Should
(
Equal
(
LabelValueDynamoProfiler
))
Expect
(
job
.
Labels
[
LabelDGDR
])
.
Should
(
Equal
(
dgdrName
))
Expect
(
job
.
Labels
[
LabelDGDR
])
.
Should
(
Equal
(
dgdrName
))
...
@@ -300,13 +304,13 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -300,13 +304,13 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
))
))
// Clean up job
// Clean up job
k8sClient
.
Delete
(
ctx
,
job
)
_
=
k8sClient
.
Delete
(
ctx
,
job
)
})
})
It
(
"Should create offline (AIC) profiling job"
,
func
()
{
It
(
"Should create offline (AIC) profiling job"
,
func
()
{
ctx
:=
context
.
Background
()
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-profiling-aic"
dgdrName
:=
"test-dgdr-profiling-aic"
namespace
:=
"
default
"
namespace
:=
default
Namespace
// Create ServiceAccount
// Create ServiceAccount
sa
:=
&
corev1
.
ServiceAccount
{
sa
:=
&
corev1
.
ServiceAccount
{
...
@@ -315,8 +319,8 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -315,8 +319,8 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
Namespace
:
namespace
,
Namespace
:
namespace
,
},
},
}
}
_
=
k8sClient
.
Create
(
ctx
,
sa
)
Expect
(
k8sClient
.
Create
(
ctx
,
sa
)
)
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
sa
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
sa
)
}()
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
@@ -325,9 +329,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -325,9 +329,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"trtllm"
,
Backend
:
"trtllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
"config"
:
"/tmp/test-config.yaml"
,
...
@@ -355,7 +359,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -355,7 +359,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Reconcile
// Reconcile
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
@@ -382,7 +386,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -382,7 +386,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
jobName
:=
getProfilingJobName
(
dgdr
)
jobName
:=
getProfilingJobName
(
dgdr
)
job
:=
&
batchv1
.
Job
{}
job
:=
&
batchv1
.
Job
{}
if
err
:=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
jobName
,
Namespace
:
namespace
},
job
);
err
==
nil
{
if
err
:=
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
jobName
,
Namespace
:
namespace
},
job
);
err
==
nil
{
k8sClient
.
Delete
(
ctx
,
job
)
_
=
k8sClient
.
Delete
(
ctx
,
job
)
}
}
})
})
})
})
...
@@ -391,7 +395,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -391,7 +395,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
It
(
"Should generate DGD spec from ConfigMap"
,
func
()
{
It
(
"Should generate DGD spec from ConfigMap"
,
func
()
{
ctx
:=
context
.
Background
()
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-profiling-complete"
dgdrName
:=
"test-dgdr-profiling-complete"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
@@ -400,9 +404,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -400,9 +404,9 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
"config"
:
"/tmp/test-config.yaml"
,
...
@@ -419,7 +423,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -419,7 +423,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Update status to Profiling using Status subresource
// Update status to Profiling using Status subresource
dgdr
.
Status
.
State
=
StateProfiling
dgdr
.
Status
.
State
=
StateProfiling
...
@@ -451,7 +455,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
...
@@ -451,7 +455,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
},
},
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
job
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
job
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
job
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
job
)
}()
// Update job status to completed using Status subresource
// Update job status to completed using Status subresource
job
.
Status
.
Conditions
=
[]
batchv1
.
JobCondition
{{
job
.
Status
.
Conditions
=
[]
batchv1
.
JobCondition
{{
...
@@ -481,7 +485,7 @@ spec:
...
@@ -481,7 +485,7 @@ spec:
},
},
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
cm
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
cm
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
cm
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
cm
)
}()
// Reconcile to process the profiling completion
// Reconcile to process the profiling completion
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
@@ -505,7 +509,7 @@ spec:
...
@@ -505,7 +509,7 @@ spec:
It
(
"Should create DGD after profiling"
,
func
()
{
It
(
"Should create DGD after profiling"
,
func
()
{
ctx
:=
context
.
Background
()
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-autoapply"
dgdrName
:=
"test-dgdr-autoapply"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
@@ -514,9 +518,9 @@ spec:
...
@@ -514,9 +518,9 @@ spec:
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
"config"
:
"/tmp/test-config.yaml"
,
...
@@ -534,7 +538,7 @@ spec:
...
@@ -534,7 +538,7 @@ spec:
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Update status to Profiling using Status subresource
// Update status to Profiling using Status subresource
dgdr
.
Status
.
State
=
StateProfiling
dgdr
.
Status
.
State
=
StateProfiling
...
@@ -566,7 +570,7 @@ spec:
...
@@ -566,7 +570,7 @@ spec:
},
},
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
job
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
job
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
job
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
job
)
}()
// Update job status to completed using Status subresource
// Update job status to completed using Status subresource
job
.
Status
.
Conditions
=
[]
batchv1
.
JobCondition
{{
job
.
Status
.
Conditions
=
[]
batchv1
.
JobCondition
{{
...
@@ -596,7 +600,7 @@ spec:
...
@@ -596,7 +600,7 @@ spec:
},
},
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
cm
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
cm
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
cm
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
cm
)
}()
// Reconcile to generate spec (transitions to Deploying because autoApply=true)
// Reconcile to generate spec (transitions to Deploying because autoApply=true)
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
@@ -620,14 +624,14 @@ spec:
...
@@ -620,14 +624,14 @@ spec:
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
"test-dgd-auto"
,
Namespace
:
namespace
},
dgd
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
"test-dgd-auto"
,
Namespace
:
namespace
},
dgd
))
.
Should
(
Succeed
())
// Get final DGDR status
// Get final DGDR status
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
updated
)
)
.
Should
(
Succeed
())
Expect
(
updated
.
Status
.
Deployment
)
.
NotTo
(
BeNil
())
Expect
(
updated
.
Status
.
Deployment
)
.
NotTo
(
BeNil
())
Expect
(
updated
.
Status
.
Deployment
.
Created
)
.
Should
(
BeTrue
())
Expect
(
updated
.
Status
.
Deployment
.
Created
)
.
Should
(
BeTrue
())
Expect
(
updated
.
Status
.
Deployment
.
Name
)
.
Should
(
Equal
(
"test-dgd-auto"
))
Expect
(
updated
.
Status
.
Deployment
.
Name
)
.
Should
(
Equal
(
"test-dgd-auto"
))
// Clean up DGD
// Clean up DGD
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
"test-dgd-auto"
,
Namespace
:
namespace
},
dgd
)
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
"test-dgd-auto"
,
Namespace
:
namespace
},
dgd
)
)
.
Should
(
Succeed
())
k8sClient
.
Delete
(
ctx
,
dgd
)
_
=
k8sClient
.
Delete
(
ctx
,
dgd
)
})
})
})
})
...
@@ -635,7 +639,7 @@ spec:
...
@@ -635,7 +639,7 @@ spec:
It
(
"Should reject spec changes after profiling starts"
,
func
()
{
It
(
"Should reject spec changes after profiling starts"
,
func
()
{
ctx
:=
context
.
Background
()
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-immutable"
dgdrName
:=
"test-dgdr-immutable"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
@@ -644,9 +648,9 @@ spec:
...
@@ -644,9 +648,9 @@ spec:
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
"config"
:
"/tmp/test-config.yaml"
,
...
@@ -663,7 +667,7 @@ spec:
...
@@ -663,7 +667,7 @@ spec:
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Reconcile to initialize
// Reconcile to initialize
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
@@ -673,22 +677,22 @@ spec:
...
@@ -673,22 +677,22 @@ spec:
// Get current generation
// Get current generation
var
current
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
var
current
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
)
.
Should
(
Succeed
())
initialGeneration
:=
current
.
Generation
initialGeneration
:=
current
.
Generation
observedGeneration
:=
current
.
Status
.
ObservedGeneration
observedGeneration
:=
current
.
Status
.
ObservedGeneration
// Manually set state to Profiling to simulate in-progress profiling
// Manually set state to Profiling to simulate in-progress profiling
current
.
Status
.
State
=
StateProfiling
current
.
Status
.
State
=
StateProfiling
k8sClient
.
Status
()
.
Update
(
ctx
,
&
current
)
Expect
(
k8sClient
.
Status
()
.
Update
(
ctx
,
&
current
)
)
.
Should
(
Succeed
())
// Try to modify spec
// Try to modify spec
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
)
.
Should
(
Succeed
())
// Unmarshal config, modify it, and marshal back
// Unmarshal config, modify it, and marshal back
var
config
map
[
string
]
interface
{}
var
config
map
[
string
]
interface
{}
yaml
.
Unmarshal
(
current
.
Spec
.
ProfilingConfig
.
Config
.
Raw
,
&
config
)
Expect
(
yaml
.
Unmarshal
(
current
.
Spec
.
ProfilingConfig
.
Config
.
Raw
,
&
config
)
)
.
Should
(
Succeed
())
config
[
"sla"
]
.
(
map
[
string
]
interface
{})[
"ttft"
]
=
200.0
config
[
"sla"
]
.
(
map
[
string
]
interface
{})[
"ttft"
]
=
200.0
current
.
Spec
.
ProfilingConfig
.
Config
=
createTestConfig
(
config
)
current
.
Spec
.
ProfilingConfig
.
Config
=
createTestConfig
(
config
)
k8sClient
.
Update
(
ctx
,
&
current
)
Expect
(
k8sClient
.
Update
(
ctx
,
&
current
)
)
.
Should
(
Succeed
())
// Reconcile
// Reconcile
_
,
err
=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
_
,
err
=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
@@ -697,7 +701,7 @@ spec:
...
@@ -697,7 +701,7 @@ spec:
Expect
(
err
)
.
NotTo
(
HaveOccurred
())
Expect
(
err
)
.
NotTo
(
HaveOccurred
())
// Verify generation changed but observedGeneration stayed the same
// Verify generation changed but observedGeneration stayed the same
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
Expect
(
k8sClient
.
Get
(
ctx
,
types
.
NamespacedName
{
Name
:
dgdrName
,
Namespace
:
namespace
},
&
current
)
)
.
Should
(
Succeed
())
Expect
(
current
.
Generation
)
.
Should
(
BeNumerically
(
">"
,
initialGeneration
))
Expect
(
current
.
Generation
)
.
Should
(
BeNumerically
(
">"
,
initialGeneration
))
Expect
(
current
.
Status
.
ObservedGeneration
)
.
Should
(
Equal
(
observedGeneration
))
Expect
(
current
.
Status
.
ObservedGeneration
)
.
Should
(
Equal
(
observedGeneration
))
Expect
(
current
.
Status
.
State
)
.
Should
(
Equal
(
StateProfiling
))
// State unchanged
Expect
(
current
.
Status
.
State
)
.
Should
(
Equal
(
StateProfiling
))
// State unchanged
...
@@ -718,7 +722,7 @@ spec:
...
@@ -718,7 +722,7 @@ spec:
It
(
"Should transition to DeploymentDeleted state"
,
func
()
{
It
(
"Should transition to DeploymentDeleted state"
,
func
()
{
ctx
:=
context
.
Background
()
ctx
:=
context
.
Background
()
dgdrName
:=
"test-dgdr-dgd-deleted"
dgdrName
:=
"test-dgdr-dgd-deleted"
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
@@ -727,9 +731,9 @@ spec:
...
@@ -727,9 +731,9 @@ spec:
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
"config"
:
"/tmp/test-config.yaml"
,
...
@@ -747,7 +751,7 @@ spec:
...
@@ -747,7 +751,7 @@ spec:
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Update status to Ready with Deployment info using Status subresource
// Update status to Ready with Deployment info using Status subresource
dgdr
.
Status
.
State
=
StateReady
dgdr
.
Status
.
State
=
StateReady
...
@@ -874,9 +878,9 @@ var _ = Describe("DGDR Validation", func() {
...
@@ -874,9 +878,9 @@ var _ = Describe("DGDR Validation", func() {
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
"config"
:
"/tmp/test-config.yaml"
,
...
@@ -901,9 +905,9 @@ var _ = Describe("DGDR Validation", func() {
...
@@ -901,9 +905,9 @@ var _ = Describe("DGDR Validation", func() {
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"sla"
:
map
[
string
]
interface
{}{
"sla"
:
map
[
string
]
interface
{}{
"ttft"
:
100.0
,
"ttft"
:
100.0
,
...
@@ -948,8 +952,8 @@ var _ = Describe("DGDR Profiler Arguments", func() {
...
@@ -948,8 +952,8 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Namespace
:
namespace
,
Namespace
:
namespace
,
},
},
}
}
_
=
k8sClient
.
Create
(
ctx
,
sa
)
Expect
(
k8sClient
.
Create
(
ctx
,
sa
)
)
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
sa
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
sa
)
}()
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
@@ -958,9 +962,9 @@ var _ = Describe("DGDR Profiler Arguments", func() {
...
@@ -958,9 +962,9 @@ var _ = Describe("DGDR Profiler Arguments", func() {
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"trtllm"
,
Backend
:
"trtllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
"config"
:
"/tmp/test-config.yaml"
,
...
@@ -986,7 +990,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
...
@@ -986,7 +990,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Re-fetch DGDR to get proper metadata from API server
// Re-fetch DGDR to get proper metadata from API server
var
fetchedDGDR
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
var
fetchedDGDR
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
...
@@ -1009,12 +1013,12 @@ var _ = Describe("DGDR Profiler Arguments", func() {
...
@@ -1009,12 +1013,12 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Expect
(
args
)
.
Should
(
ContainElement
(
"--profile-config"
))
Expect
(
args
)
.
Should
(
ContainElement
(
"--profile-config"
))
// Clean up
// Clean up
k8sClient
.
Delete
(
ctx
,
job
)
_
=
k8sClient
.
Delete
(
ctx
,
job
)
})
})
It
(
"Should pass config with AI Configurator settings for offline profiling"
,
func
()
{
It
(
"Should pass config with AI Configurator settings for offline profiling"
,
func
()
{
ctx
:=
context
.
Background
()
ctx
:=
context
.
Background
()
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdrName
:=
"test-args-offline"
dgdrName
:=
"test-args-offline"
// Create ServiceAccount
// Create ServiceAccount
...
@@ -1024,8 +1028,8 @@ var _ = Describe("DGDR Profiler Arguments", func() {
...
@@ -1024,8 +1028,8 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Namespace
:
namespace
,
Namespace
:
namespace
,
},
},
}
}
_
=
k8sClient
.
Create
(
ctx
,
sa
)
Expect
(
k8sClient
.
Create
(
ctx
,
sa
)
)
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
sa
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
sa
)
}()
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
ObjectMeta
:
metav1
.
ObjectMeta
{
ObjectMeta
:
metav1
.
ObjectMeta
{
...
@@ -1034,9 +1038,9 @@ var _ = Describe("DGDR Profiler Arguments", func() {
...
@@ -1034,9 +1038,9 @@ var _ = Describe("DGDR Profiler Arguments", func() {
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"trtllm"
,
Backend
:
"trtllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
"config"
:
"/tmp/test-config.yaml"
,
...
@@ -1065,7 +1069,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
...
@@ -1065,7 +1069,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Re-fetch DGDR to get proper metadata from API server
// Re-fetch DGDR to get proper metadata from API server
var
fetchedDGDR
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
var
fetchedDGDR
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
...
@@ -1088,7 +1092,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
...
@@ -1088,7 +1092,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
Expect
(
args
)
.
Should
(
ContainElement
(
"--profile-config"
))
Expect
(
args
)
.
Should
(
ContainElement
(
"--profile-config"
))
// Clean up
// Clean up
k8sClient
.
Delete
(
ctx
,
job
)
_
=
k8sClient
.
Delete
(
ctx
,
job
)
})
})
})
})
})
})
...
@@ -1112,7 +1116,7 @@ var _ = Describe("DGDR Error Handling", func() {
...
@@ -1112,7 +1116,7 @@ var _ = Describe("DGDR Error Handling", func() {
Context
(
"When profiling job fails"
,
func
()
{
Context
(
"When profiling job fails"
,
func
()
{
It
(
"Should capture detailed error from pod termination state"
,
func
()
{
It
(
"Should capture detailed error from pod termination state"
,
func
()
{
ctx
:=
context
.
Background
()
ctx
:=
context
.
Background
()
namespace
:=
"
default
"
namespace
:=
default
Namespace
dgdrName
:=
"test-error-capture"
dgdrName
:=
"test-error-capture"
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
dgdr
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequest
{
...
@@ -1122,9 +1126,9 @@ var _ = Describe("DGDR Error Handling", func() {
...
@@ -1122,9 +1126,9 @@ var _ = Describe("DGDR Error Handling", func() {
},
},
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Spec
:
nvidiacomv1alpha1
.
DynamoGraphDeploymentRequestSpec
{
Model
:
"test-model"
,
Model
:
"test-model"
,
ProfilerImage
:
"test-profiler:latest"
,
Backend
:
"vllm"
,
Backend
:
"vllm"
,
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilingConfig
:
nvidiacomv1alpha1
.
ProfilingConfigSpec
{
ProfilerImage
:
"test-profiler:latest"
,
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
Config
:
createTestConfig
(
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"engine"
:
map
[
string
]
interface
{}{
"config"
:
"/tmp/test-config.yaml"
,
"config"
:
"/tmp/test-config.yaml"
,
...
@@ -1145,7 +1149,7 @@ var _ = Describe("DGDR Error Handling", func() {
...
@@ -1145,7 +1149,7 @@ var _ = Describe("DGDR Error Handling", func() {
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
dgdr
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
dgdr
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
dgdr
)
}()
// Set status to Profiling
// Set status to Profiling
dgdr
.
Status
.
State
=
StateProfiling
dgdr
.
Status
.
State
=
StateProfiling
...
@@ -1178,7 +1182,7 @@ var _ = Describe("DGDR Error Handling", func() {
...
@@ -1178,7 +1182,7 @@ var _ = Describe("DGDR Error Handling", func() {
},
},
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
job
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
job
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
job
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
job
)
}()
// Update job status
// Update job status
job
.
Status
.
Conditions
=
[]
batchv1
.
JobCondition
{{
job
.
Status
.
Conditions
=
[]
batchv1
.
JobCondition
{{
...
@@ -1219,7 +1223,7 @@ var _ = Describe("DGDR Error Handling", func() {
...
@@ -1219,7 +1223,7 @@ var _ = Describe("DGDR Error Handling", func() {
},
},
}
}
Expect
(
k8sClient
.
Create
(
ctx
,
pod
))
.
Should
(
Succeed
())
Expect
(
k8sClient
.
Create
(
ctx
,
pod
))
.
Should
(
Succeed
())
defer
k8sClient
.
Delete
(
ctx
,
pod
)
defer
func
()
{
_
=
k8sClient
.
Delete
(
ctx
,
pod
)
}()
// Reconcile - should capture error details
// Reconcile - should capture error details
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
_
,
err
:=
reconciler
.
Reconcile
(
ctx
,
reconcile
.
Request
{
...
...
deploy/cloud/operator/internal/controller_common/pod.go
View file @
b1732a5f
...
@@ -101,9 +101,11 @@ func CanonicalizePodSpec(podSpec *corev1.PodSpec) *corev1.PodSpec {
...
@@ -101,9 +101,11 @@ func CanonicalizePodSpec(podSpec *corev1.PodSpec) *corev1.PodSpec {
// Sort image pull secrets
// Sort image pull secrets
if
len
(
podSpec
.
ImagePullSecrets
)
>
1
{
if
len
(
podSpec
.
ImagePullSecrets
)
>
1
{
sort
.
Slice
(
podSpec
.
ImagePullSecrets
,
func
(
i
,
j
int
)
bool
{
uniqueSecrets
:=
ensureUniqueImagePullSecrets
(
podSpec
.
ImagePullSecrets
)
return
podSpec
.
ImagePullSecrets
[
i
]
.
Name
<
podSpec
.
ImagePullSecrets
[
j
]
.
Name
sort
.
Slice
(
uniqueSecrets
,
func
(
i
,
j
int
)
bool
{
return
uniqueSecrets
[
i
]
.
Name
<
uniqueSecrets
[
j
]
.
Name
})
})
podSpec
.
ImagePullSecrets
=
uniqueSecrets
}
}
// Sort volumes and their nested items
// Sort volumes and their nested items
...
@@ -275,3 +277,18 @@ func CanonicalizePodSpec(podSpec *corev1.PodSpec) *corev1.PodSpec {
...
@@ -275,3 +277,18 @@ func CanonicalizePodSpec(podSpec *corev1.PodSpec) *corev1.PodSpec {
return
podSpec
return
podSpec
}
}
func
ensureUniqueImagePullSecrets
(
secrets
[]
corev1
.
LocalObjectReference
)
[]
corev1
.
LocalObjectReference
{
if
len
(
secrets
)
==
0
{
return
nil
}
uniqueSecrets
:=
make
(
map
[
string
]
corev1
.
LocalObjectReference
)
for
_
,
secret
:=
range
secrets
{
uniqueSecrets
[
secret
.
Name
]
=
secret
}
uniqueSecretsList
:=
make
([]
corev1
.
LocalObjectReference
,
0
,
len
(
uniqueSecrets
))
for
secretName
:=
range
uniqueSecrets
{
uniqueSecretsList
=
append
(
uniqueSecretsList
,
corev1
.
LocalObjectReference
{
Name
:
secretName
})
}
return
uniqueSecretsList
}
deploy/cloud/operator/internal/controller_common/pod_test.go
View file @
b1732a5f
...
@@ -208,6 +208,7 @@ func TestCanonicalizePodSpec(t *testing.T) {
...
@@ -208,6 +208,7 @@ func TestCanonicalizePodSpec(t *testing.T) {
{
Name
:
"registry-z"
},
{
Name
:
"registry-z"
},
{
Name
:
"registry-a"
},
{
Name
:
"registry-a"
},
{
Name
:
"registry-b"
},
{
Name
:
"registry-b"
},
{
Name
:
"registry-a"
},
},
},
},
},
expected
:
&
corev1
.
PodSpec
{
expected
:
&
corev1
.
PodSpec
{
...
@@ -218,6 +219,15 @@ func TestCanonicalizePodSpec(t *testing.T) {
...
@@ -218,6 +219,15 @@ func TestCanonicalizePodSpec(t *testing.T) {
},
},
},
},
},
},
{
name
:
"sorts nil image pull secrets"
,
input
:
&
corev1
.
PodSpec
{
ImagePullSecrets
:
nil
,
},
expected
:
&
corev1
.
PodSpec
{
ImagePullSecrets
:
nil
,
},
},
{
{
name
:
"sorts volumes by name"
,
name
:
"sorts volumes by name"
,
input
:
&
corev1
.
PodSpec
{
input
:
&
corev1
.
PodSpec
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment