Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
6e213d90
Unverified
Commit
6e213d90
authored
Oct 27, 2025
by
Julien Mancuso
Committed by
GitHub
Oct 27, 2025
Browse files
fix: fix operator tests (#3904)
Signed-off-by:
Julien Mancuso
<
jmancuso@nvidia.com
>
parent
59535682
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
99 additions
and
89 deletions
+99
-89
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
...l/controller/dynamocomponentdeployment_controller_test.go
+10
-6
deploy/cloud/operator/internal/dynamo/backend_vllm.go
deploy/cloud/operator/internal/dynamo/backend_vllm.go
+1
-1
deploy/cloud/operator/internal/dynamo/backend_vllm_test.go
deploy/cloud/operator/internal/dynamo/backend_vllm_test.go
+1
-1
deploy/cloud/operator/internal/dynamo/graph_test.go
deploy/cloud/operator/internal/dynamo/graph_test.go
+85
-80
docs/kubernetes/dynamo_operator.md
docs/kubernetes/dynamo_operator.md
+2
-1
No files found.
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
View file @
6e213d90
...
...
@@ -734,11 +734,15 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
MainContainer
:
&
corev1
.
Container
{
Image
:
"test-image:latest"
,
Command
:
[]
string
{
"sh"
,
"-c"
,
"some"
,
"dynamo"
,
"command"
,
},
Args
:
[]
string
{
"some dynamo command"
,
"--tensor-parallel-size"
,
"4"
,
"--pipeline-parallel-size"
,
"1"
,
},
Env
:
[]
corev1
.
EnvVar
{
{
...
...
@@ -817,8 +821,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
{
Name
:
commonconsts
.
MainContainerName
,
Image
:
"test-image:latest"
,
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
"ray start --head --port=6379 && some dynamo command"
},
Command
:
[]
string
{
"
/bin/
sh"
,
"-c"
},
Args
:
[]
string
{
"ray start --head --port=6379 && some dynamo command
--tensor-parallel-size 4 --pipeline-parallel-size 1
"
},
Env
:
[]
corev1
.
EnvVar
{
{
Name
:
"DYN_NAMESPACE"
,
Value
:
"default"
},
{
Name
:
"DYN_PARENT_DGD_K8S_NAME"
,
Value
:
"test-lws-deploy"
},
...
...
@@ -931,7 +935,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
{
Name
:
commonconsts
.
MainContainerName
,
Image
:
"test-image:latest"
,
Command
:
[]
string
{
"sh"
,
"-c"
},
Command
:
[]
string
{
"
/bin/
sh"
,
"-c"
},
Args
:
[]
string
{
"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"
},
Env
:
[]
corev1
.
EnvVar
{
{
Name
:
"DYN_NAMESPACE"
,
Value
:
"default"
},
...
...
deploy/cloud/operator/internal/dynamo/backend_vllm.go
View file @
6e213d90
...
...
@@ -106,7 +106,7 @@ func injectRayDistributedLaunchFlags(container *corev1.Container, role Role, ser
leaderHostname
:=
multinodeDeployer
.
GetLeaderHostname
(
serviceName
)
container
.
Args
=
[]
string
{
fmt
.
Sprintf
(
"ray start --address=%s:%s --block"
,
leaderHostname
,
VLLMPort
)}
}
container
.
Command
=
[]
string
{
"sh"
,
"-c"
}
// ensure cmd is a shell
container
.
Command
=
[]
string
{
"
/bin/
sh"
,
"-c"
}
// ensure cmd is a shell
}
func
injectDataParallelLaunchFlags
(
container
*
corev1
.
Container
,
role
Role
,
serviceName
string
,
multinodeDeployer
MultinodeDeployer
)
{
...
...
deploy/cloud/operator/internal/dynamo/backend_vllm_test.go
View file @
6e213d90
...
...
@@ -303,7 +303,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
name
:
"leader prepends ray start --head"
,
role
:
RoleLeader
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
,
tensorParallelSizeFlag
,
"16"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"8"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
}
,
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
tensorParallelSizeFlag
,
"16"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"8"
)}}},
expectedArgs
:
[]
string
{
fmt
.
Sprintf
(
"ray start --head --port=%s && python3 -m dynamo.vllm %s 16"
,
VLLMPort
,
tensorParallelSizeFlag
)},
},
{
...
...
deploy/cloud/operator/internal/dynamo/graph_test.go
View file @
6e213d90
...
...
@@ -2417,11 +2417,17 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
MainContainer
:
&
corev1
.
Container
{
Image
:
"worker-image"
,
Command
:
[]
string
{
"/bin/sh"
,
"-c"
,
"python3"
,
"-m"
,
"dynamo.vllm"
,
},
Args
:
[]
string
{
"python3 -m dynamo.vllm --custom-flag custom-value"
,
"--custom-flag"
,
"custom-value"
,
"--tensor-parallel-size"
,
"4"
,
"--pipeline-parallel-size"
,
"1"
,
},
StartupProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
...
...
@@ -2598,7 +2604,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
"-c"
,
},
Args
:
[]
string
{
"ray start --head --port=6379 && python3 -m dynamo.vllm --custom-flag custom-value"
,
"ray start --head --port=6379 && python3 -m dynamo.vllm --custom-flag custom-value
--tensor-parallel-size 4 --pipeline-parallel-size 1
"
,
},
Ports
:
[]
corev1
.
ContainerPort
{
{
...
...
@@ -3345,7 +3351,7 @@ func TestGeneratePodSpecForComponent_VLLM(t *testing.T) {
ComponentType
:
commonconsts
.
ComponentTypeWorker
,
ExtraPodSpec
:
&
common
.
ExtraPodSpec
{
MainContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
},
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
,
"--tensor-parallel-size"
,
"4"
,
"--pipeline-parallel-size"
,
"1"
},
},
},
},
...
...
@@ -3359,6 +3365,11 @@ func TestGeneratePodSpecForComponent_VLLM(t *testing.T) {
name
:
"VLLM multinode worker"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ComponentType
:
commonconsts
.
ComponentTypeWorker
,
ExtraPodSpec
:
&
common
.
ExtraPodSpec
{
MainContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
,
"--tensor-parallel-size"
,
"4"
,
"--pipeline-parallel-size"
,
"1"
},
},
},
},
backendFramework
:
BackendFrameworkVLLM
,
role
:
RoleWorker
,
...
...
@@ -4757,7 +4768,7 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
tests
:=
[]
struct
{
name
string
component
*
v1alpha1
.
DynamoComponentDeployment
Overrides
Spec
component
*
v1alpha1
.
DynamoComponentDeployment
Shared
Spec
expectError
bool
expectedResourceClaims
[]
corev1
.
ResourceClaim
expectedPodClaims
[]
corev1
.
PodResourceClaim
...
...
@@ -4765,55 +4776,53 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
}{
{
name
:
"component with resource claims"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentOverridesSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ComponentType
:
commonconsts
.
ComponentTypeWorker
,
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
CPU
:
"130"
,
Memory
:
"800Gi"
,
},
Limits
:
&
common
.
ResourceItem
{
CPU
:
"130"
,
Memory
:
"800Gi"
,
GPU
:
"4"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ComponentType
:
commonconsts
.
ComponentTypeWorker
,
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
CPU
:
"130"
,
Memory
:
"800Gi"
,
},
Limits
:
&
common
.
ResourceItem
{
CPU
:
"130"
,
Memory
:
"800Gi"
,
GPU
:
"4"
,
},
Claims
:
[]
corev1
.
ResourceClaim
{
{
Name
:
"compute-domain-channel"
,
},
Claims
:
[]
corev1
.
ResourceClaim
{
},
},
ExtraPodSpec
:
&
common
.
ExtraPodSpec
{
PodSpec
:
&
corev1
.
PodSpec
{
ResourceClaims
:
[]
corev1
.
PodResourceClaim
{
{
Name
:
"compute-domain-channel"
,
Name
:
"compute-domain-channel"
,
ResourceClaimTemplateName
:
ptr
.
To
(
"trtllm-test-compute-domain-channel"
),
},
},
},
ExtraPodSpec
:
&
common
.
ExtraPodSpec
{
PodSpec
:
&
corev1
.
PodSpec
{
ResourceClaims
:
[]
corev1
.
PodResourceClaim
{
{
Name
:
"compute-domain-channel"
,
ResourceClaimTemplateName
:
ptr
.
To
(
"trtllm-test-compute-domain-channel"
),
},
},
Volumes
:
[]
corev1
.
Volume
{
{
Name
:
"model-storage"
,
VolumeSource
:
corev1
.
VolumeSource
{
PersistentVolumeClaim
:
&
corev1
.
PersistentVolumeClaimVolumeSource
{
ClaimName
:
"dynamo-pvc"
,
},
Volumes
:
[]
corev1
.
Volume
{
{
Name
:
"model-storage"
,
VolumeSource
:
corev1
.
VolumeSource
{
PersistentVolumeClaim
:
&
corev1
.
PersistentVolumeClaimVolumeSource
{
ClaimName
:
"dynamo-pvc"
,
},
},
},
},
MainContainer
:
&
corev1
.
Container
{
Image
:
"rohanv672/dynamo:v0.5.1-trtllm"
,
Args
:
[]
string
{
"python3 -m dynamo.trtllm --model-path /data/deepseek-r1 --served-model-name deepseek-ai/DeepSeek-R1 --extra-engine-args /data/engine_configs/wide_ep_agg.yaml"
,
}
,
Command
:
[]
string
{
"/bin/sh"
,
"-c"
},
VolumeMounts
:
[]
corev1
.
VolumeMount
{
{
Name
:
"model-storage"
,
MountPath
:
"/data
"
,
}
,
},
MainContainer
:
&
corev1
.
Container
{
Image
:
"rohanv672/dynamo:v0.5.1-trtllm"
,
Args
:
[]
string
{
"python3 -m dynamo.trtllm --model-path /data/deepseek-r1 --served-model-name deepseek-ai/DeepSeek-R1 --extra-engine-args /data/engine_configs/wide_ep_agg.yaml"
,
},
Command
:
[]
string
{
"/bin/sh"
,
"-c"
},
VolumeMounts
:
[]
corev1
.
VolumeMount
{
{
Name
:
"model-storage
"
,
MountPath
:
"/data"
,
},
},
},
...
...
@@ -4853,37 +4862,35 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
},
{
name
:
"component with multiple resource claims"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentOverridesSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ComponentType
:
commonconsts
.
ComponentTypeWorker
,
Resources
:
&
common
.
Resources
{
Claims
:
[]
corev1
.
ResourceClaim
{
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ComponentType
:
commonconsts
.
ComponentTypeWorker
,
Resources
:
&
common
.
Resources
{
Claims
:
[]
corev1
.
ResourceClaim
{
{
Name
:
"compute-domain-channel"
,
},
{
Name
:
"network-domain-channel"
,
},
},
},
ExtraPodSpec
:
&
common
.
ExtraPodSpec
{
PodSpec
:
&
corev1
.
PodSpec
{
ResourceClaims
:
[]
corev1
.
PodResourceClaim
{
{
Name
:
"compute-domain-channel"
,
Name
:
"compute-domain-channel"
,
ResourceClaimTemplateName
:
ptr
.
To
(
"compute-template"
),
},
{
Name
:
"network-domain-channel"
,
Name
:
"network-domain-channel"
,
ResourceClaimTemplateName
:
ptr
.
To
(
"network-template"
),
},
},
},
ExtraPodSpec
:
&
common
.
ExtraPodSpec
{
PodSpec
:
&
corev1
.
PodSpec
{
ResourceClaims
:
[]
corev1
.
PodResourceClaim
{
{
Name
:
"compute-domain-channel"
,
ResourceClaimTemplateName
:
ptr
.
To
(
"compute-template"
),
},
{
Name
:
"network-domain-channel"
,
ResourceClaimTemplateName
:
ptr
.
To
(
"network-template"
),
},
},
},
MainContainer
:
&
corev1
.
Container
{
Image
:
"test-image"
,
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.worker"
},
},
MainContainer
:
&
corev1
.
Container
{
Image
:
"test-image"
,
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.worker"
},
},
},
},
...
...
@@ -4909,14 +4916,12 @@ func TestGenerateBasePodSpec_ResourceClaims(t *testing.T) {
},
{
name
:
"component without resource claims"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentOverridesSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ComponentType
:
commonconsts
.
ComponentTypeFrontend
,
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
CPU
:
"1"
,
Memory
:
"1Gi"
,
},
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ComponentType
:
commonconsts
.
ComponentTypeFrontend
,
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
CPU
:
"1"
,
Memory
:
"1Gi"
,
},
},
},
...
...
docs/kubernetes/dynamo_operator.md
View file @
6e213d90
...
...
@@ -39,7 +39,8 @@ helm fetch https://helm.ngc.nvidia.com/nvidia/ai-dynamo/charts/dynamo-platform-$
helm
install
dynamo-platform dynamo-platform-
${
RELEASE_VERSION
}
.tgz
--namespace
${
NAMESPACE
}
--create-namespace
```
For namespace-restricted installations (shared clusters):
For namespace-restricted installations (shared clusters), you'll need to install the Dynamo platform in each namespace you want to deploy to.
Namespace restriction is enabled by setting the
`dynamo-operator.namespaceRestriction.enabled`
flag to
`true`
.
```
bash
helm
install
dynamo-platform dynamo-platform-
${
RELEASE_VERSION
}
.tgz
\
--namespace
${
NAMESPACE
}
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment