Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
9caafa55
Unverified
Commit
9caafa55
authored
Nov 18, 2025
by
Thomas Montfort
Committed by
GitHub
Nov 18, 2025
Browse files
feat: add custom gpu type to CRD (#4408)
parent
33d9ae78
Changes
25
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
599 additions
and
398 deletions
+599
-398
deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml
...crds/templates/nvidia.com_dynamocomponentdeployments.yaml
+23
-4
deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml
...elm/crds/templates/nvidia.com_dynamographdeployments.yaml
+23
-4
deploy/cloud/operator/api/dynamo/common/common.go
deploy/cloud/operator/api/dynamo/common/common.go
+0
-62
deploy/cloud/operator/api/dynamo/common/zz_generated.deepcopy.go
...cloud/operator/api/dynamo/common/zz_generated.deepcopy.go
+0
-208
deploy/cloud/operator/api/v1alpha1/common.go
deploy/cloud/operator/api/v1alpha1/common.go
+50
-0
deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
.../operator/api/v1alpha1/dynamocomponentdeployment_types.go
+3
-4
deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go
deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go
+180
-15
deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml
...nfig/crd/bases/nvidia.com_dynamocomponentdeployments.yaml
+23
-4
deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
...r/config/crd/bases/nvidia.com_dynamographdeployments.yaml
+23
-4
deploy/cloud/operator/internal/common/schemas.go
deploy/cloud/operator/internal/common/schemas.go
+27
-0
deploy/cloud/operator/internal/common/url.go
deploy/cloud/operator/internal/common/url.go
+16
-0
deploy/cloud/operator/internal/common/url_test.go
deploy/cloud/operator/internal/common/url_test.go
+16
-0
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
...ternal/controller/dynamocomponentdeployment_controller.go
+6
-6
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
...l/controller/dynamocomponentdeployment_controller_test.go
+12
-13
deploy/cloud/operator/internal/controller_common/resource.go
deploy/cloud/operator/internal/controller_common/resource.go
+13
-3
deploy/cloud/operator/internal/controller_common/resource_test.go
...loud/operator/internal/controller_common/resource_test.go
+58
-0
deploy/cloud/operator/internal/dynamo/backend_trtllm.go
deploy/cloud/operator/internal/dynamo/backend_trtllm.go
+1
-2
deploy/cloud/operator/internal/dynamo/backend_trtllm_test.go
deploy/cloud/operator/internal/dynamo/backend_trtllm_test.go
+31
-32
deploy/cloud/operator/internal/dynamo/backend_vllm.go
deploy/cloud/operator/internal/dynamo/backend_vllm.go
+26
-21
deploy/cloud/operator/internal/dynamo/backend_vllm_test.go
deploy/cloud/operator/internal/dynamo/backend_vllm_test.go
+68
-16
No files found.
deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml
View file @
9caafa55
...
...
@@ -10193,6 +10193,7 @@ spec:
GPUs/devices, and any runtime-specific resources.
properties:
claims:
description: Claims specifies resource claims for dynamic resource allocation
items:
description: ResourceClaim references one entry in PodSpec.ResourceClaims.
properties:
...
...
@@ -10213,35 +10214,53 @@ spec:
type: object
type: array
limits:
description: Limits specifies the maximum resources allowed for the component
properties:
cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string
custom:
additionalProperties:
type: string
description: Custom specifies additional custom resource requests/limits
type: object
gpu:
description: |-
Indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU indicates the number of GPUs to request.
Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string
memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string
type: object
requests:
description: Requests specifies the minimum resources required by the component
properties:
cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string
custom:
additionalProperties:
type: string
description: Custom specifies additional custom resource requests/limits
type: object
gpu:
description: |-
Indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU indicates the number of GPUs to request.
Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string
memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string
type: object
type: object
...
...
deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml
View file @
9caafa55
...
...
@@ -10328,6 +10328,7 @@ spec:
GPUs/devices, and any runtime-specific resources.
properties:
claims:
description: Claims specifies resource claims for dynamic resource allocation
items:
description: ResourceClaim references one entry in PodSpec.ResourceClaims.
properties:
...
...
@@ -10348,35 +10349,53 @@ spec:
type: object
type: array
limits:
description: Limits specifies the maximum resources allowed for the component
properties:
cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string
custom:
additionalProperties:
type: string
description: Custom specifies additional custom resource requests/limits
type: object
gpu:
description: |-
Indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU indicates the number of GPUs to request.
Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string
memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string
type: object
requests:
description: Requests specifies the minimum resources required by the component
properties:
cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string
custom:
additionalProperties:
type: string
description: Custom specifies additional custom resource requests/limits
type: object
gpu:
description: |-
Indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU indicates the number of GPUs to request.
Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string
memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string
type: object
type: object
...
...
deploy/cloud/operator/api/dynamo/common/common.go
deleted
100644 → 0
View file @
33d9ae78
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// +k8s:deepcopy-gen=package
package
common
import
(
corev1
"k8s.io/api/core/v1"
)
type
ResourceItem
struct
{
CPU
string
`json:"cpu,omitempty"`
Memory
string
`json:"memory,omitempty"`
// Indicates the number of GPUs to request.
// total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU
string
`json:"gpu,omitempty"`
Custom
map
[
string
]
string
`json:"custom,omitempty"`
}
type
Resources
struct
{
Requests
*
ResourceItem
`json:"requests,omitempty"`
Limits
*
ResourceItem
`json:"limits,omitempty"`
Claims
[]
corev1
.
ResourceClaim
`json:"claims,omitempty"`
}
type
DeploymentTargetHPAConf
struct
{
CPU
*
int32
`json:"cpu,omitempty"`
GPU
*
int32
`json:"gpu,omitempty"`
Memory
*
string
`json:"memory,omitempty"`
QPS
*
int64
`json:"qps,omitempty"`
MinReplicas
*
int32
`json:"min_replicas,omitempty"`
MaxReplicas
*
int32
`json:"max_replicas,omitempty"`
}
type
LabelItemSchema
struct
{
Key
string
`json:"key"`
Value
string
`json:"value"`
}
type
ExtraPodMetadata
struct
{
Annotations
map
[
string
]
string
`json:"annotations,omitempty"`
Labels
map
[
string
]
string
`json:"labels,omitempty"`
}
type
ExtraPodSpec
struct
{
*
corev1
.
PodSpec
`json:",inline"`
MainContainer
*
corev1
.
Container
`json:"mainContainer,omitempty"`
}
deploy/cloud/operator/api/dynamo/common/zz_generated.deepcopy.go
deleted
100644 → 0
View file @
33d9ae78
//go:build !ignore_autogenerated
/*
SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by controller-gen. DO NOT EDIT.
package
common
import
(
"k8s.io/api/core/v1"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
DeploymentTargetHPAConf
)
DeepCopyInto
(
out
*
DeploymentTargetHPAConf
)
{
*
out
=
*
in
if
in
.
CPU
!=
nil
{
in
,
out
:=
&
in
.
CPU
,
&
out
.
CPU
*
out
=
new
(
int32
)
**
out
=
**
in
}
if
in
.
GPU
!=
nil
{
in
,
out
:=
&
in
.
GPU
,
&
out
.
GPU
*
out
=
new
(
int32
)
**
out
=
**
in
}
if
in
.
Memory
!=
nil
{
in
,
out
:=
&
in
.
Memory
,
&
out
.
Memory
*
out
=
new
(
string
)
**
out
=
**
in
}
if
in
.
QPS
!=
nil
{
in
,
out
:=
&
in
.
QPS
,
&
out
.
QPS
*
out
=
new
(
int64
)
**
out
=
**
in
}
if
in
.
MinReplicas
!=
nil
{
in
,
out
:=
&
in
.
MinReplicas
,
&
out
.
MinReplicas
*
out
=
new
(
int32
)
**
out
=
**
in
}
if
in
.
MaxReplicas
!=
nil
{
in
,
out
:=
&
in
.
MaxReplicas
,
&
out
.
MaxReplicas
*
out
=
new
(
int32
)
**
out
=
**
in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentTargetHPAConf.
func
(
in
*
DeploymentTargetHPAConf
)
DeepCopy
()
*
DeploymentTargetHPAConf
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
DeploymentTargetHPAConf
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
ExtraPodMetadata
)
DeepCopyInto
(
out
*
ExtraPodMetadata
)
{
*
out
=
*
in
if
in
.
Annotations
!=
nil
{
in
,
out
:=
&
in
.
Annotations
,
&
out
.
Annotations
*
out
=
make
(
map
[
string
]
string
,
len
(
*
in
))
for
key
,
val
:=
range
*
in
{
(
*
out
)[
key
]
=
val
}
}
if
in
.
Labels
!=
nil
{
in
,
out
:=
&
in
.
Labels
,
&
out
.
Labels
*
out
=
make
(
map
[
string
]
string
,
len
(
*
in
))
for
key
,
val
:=
range
*
in
{
(
*
out
)[
key
]
=
val
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodMetadata.
func
(
in
*
ExtraPodMetadata
)
DeepCopy
()
*
ExtraPodMetadata
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
ExtraPodMetadata
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
ExtraPodSpec
)
DeepCopyInto
(
out
*
ExtraPodSpec
)
{
*
out
=
*
in
if
in
.
PodSpec
!=
nil
{
in
,
out
:=
&
in
.
PodSpec
,
&
out
.
PodSpec
*
out
=
new
(
v1
.
PodSpec
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
MainContainer
!=
nil
{
in
,
out
:=
&
in
.
MainContainer
,
&
out
.
MainContainer
*
out
=
new
(
v1
.
Container
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodSpec.
func
(
in
*
ExtraPodSpec
)
DeepCopy
()
*
ExtraPodSpec
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
ExtraPodSpec
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
LabelItemSchema
)
DeepCopyInto
(
out
*
LabelItemSchema
)
{
*
out
=
*
in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelItemSchema.
func
(
in
*
LabelItemSchema
)
DeepCopy
()
*
LabelItemSchema
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
LabelItemSchema
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
ResourceItem
)
DeepCopyInto
(
out
*
ResourceItem
)
{
*
out
=
*
in
if
in
.
Custom
!=
nil
{
in
,
out
:=
&
in
.
Custom
,
&
out
.
Custom
*
out
=
make
(
map
[
string
]
string
,
len
(
*
in
))
for
key
,
val
:=
range
*
in
{
(
*
out
)[
key
]
=
val
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceItem.
func
(
in
*
ResourceItem
)
DeepCopy
()
*
ResourceItem
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
ResourceItem
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
Resources
)
DeepCopyInto
(
out
*
Resources
)
{
*
out
=
*
in
if
in
.
Requests
!=
nil
{
in
,
out
:=
&
in
.
Requests
,
&
out
.
Requests
*
out
=
new
(
ResourceItem
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
Limits
!=
nil
{
in
,
out
:=
&
in
.
Limits
,
&
out
.
Limits
*
out
=
new
(
ResourceItem
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
Claims
!=
nil
{
in
,
out
:=
&
in
.
Claims
,
&
out
.
Claims
*
out
=
make
([]
v1
.
ResourceClaim
,
len
(
*
in
))
copy
(
*
out
,
*
in
)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resources.
func
(
in
*
Resources
)
DeepCopy
()
*
Resources
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
Resources
)
in
.
DeepCopyInto
(
out
)
return
out
}
deploy/cloud/operator/api/v1alpha1/common.go
View file @
9caafa55
...
...
@@ -65,3 +65,53 @@ type SharedMemorySpec struct {
Disabled
bool
`json:"disabled,omitempty"`
Size
resource
.
Quantity
`json:"size,omitempty"`
}
type
ResourceItem
struct
{
// CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
CPU
string
`json:"cpu,omitempty"`
// Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
Memory
string
`json:"memory,omitempty"`
// GPU indicates the number of GPUs to request.
// Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU
string
`json:"gpu,omitempty"`
// GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
// By default if not specified, the GPU type is "nvidia.com/gpu"
GPUType
string
`json:"gpuType,omitempty"`
// Custom specifies additional custom resource requests/limits
Custom
map
[
string
]
string
`json:"custom,omitempty"`
}
// Resources defines requested and limits for a component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources.
type
Resources
struct
{
// Requests specifies the minimum resources required by the component
Requests
*
ResourceItem
`json:"requests,omitempty"`
// Limits specifies the maximum resources allowed for the component
Limits
*
ResourceItem
`json:"limits,omitempty"`
// Claims specifies resource claims for dynamic resource allocation
Claims
[]
corev1
.
ResourceClaim
`json:"claims,omitempty"`
}
type
DeploymentTargetHPAConf
struct
{
CPU
*
int32
`json:"cpu,omitempty"`
GPU
*
int32
`json:"gpu,omitempty"`
Memory
*
string
`json:"memory,omitempty"`
QPS
*
int64
`json:"qps,omitempty"`
MinReplicas
*
int32
`json:"min_replicas,omitempty"`
MaxReplicas
*
int32
`json:"max_replicas,omitempty"`
}
type
LabelItemSchema
struct
{
Key
string
`json:"key"`
Value
string
`json:"value"`
}
type
ExtraPodMetadata
struct
{
Annotations
map
[
string
]
string
`json:"annotations,omitempty"`
Labels
map
[
string
]
string
`json:"labels,omitempty"`
}
type
ExtraPodSpec
struct
{
*
corev1
.
PodSpec
`json:",inline"`
MainContainer
*
corev1
.
Container
`json:"mainContainer,omitempty"`
}
deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
View file @
9caafa55
...
...
@@ -20,7 +20,6 @@
package
v1alpha1
import
(
dynamoCommon
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1
"k8s.io/api/core/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
...
...
@@ -74,7 +73,7 @@ type DynamoComponentDeploymentSharedSpec struct {
// Resources requested and limits for this component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources.
Resources
*
dynamoCommon
.
Resources
`json:"resources,omitempty"`
Resources
*
Resources
`json:"resources,omitempty"`
// Autoscaling config for this component (replica range, target utilization, etc.).
Autoscaling
*
Autoscaling
`json:"autoscaling,omitempty"`
// Envs defines additional environment variables to inject into the component containers.
...
...
@@ -98,12 +97,12 @@ type DynamoComponentDeploymentSharedSpec struct {
// +optional
// ExtraPodMetadata adds labels/annotations to the created Pods.
ExtraPodMetadata
*
dynamoCommon
.
ExtraPodMetadata
`json:"extraPodMetadata,omitempty"`
ExtraPodMetadata
*
ExtraPodMetadata
`json:"extraPodMetadata,omitempty"`
// +optional
// ExtraPodSpec allows to override the main pod spec configuration.
// It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
// that allows overriding the main container configuration.
ExtraPodSpec
*
dynamoCommon
.
ExtraPodSpec
`json:"extraPodSpec,omitempty"`
ExtraPodSpec
*
ExtraPodSpec
`json:"extraPodSpec,omitempty"`
// LivenessProbe to detect and restart unhealthy containers.
LivenessProbe
*
corev1
.
Probe
`json:"livenessProbe,omitempty"`
...
...
deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go
View file @
9caafa55
...
...
@@ -38,11 +38,10 @@ limitations under the License.
package
v1alpha1
import
(
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"k8s.io/api/autoscaling/v2"
corev1
"k8s.io/api/core/v1"
"k8s.io/api/core/v1"
apiextensionsv1
"k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)
...
...
@@ -98,7 +97,7 @@ func (in *BaseStatus) DeepCopyInto(out *BaseStatus) {
*
out
=
*
in
if
in
.
Conditions
!=
nil
{
in
,
out
:=
&
in
.
Conditions
,
&
out
.
Conditions
*
out
=
make
([]
v1
.
Condition
,
len
(
*
in
))
*
out
=
make
([]
meta
v1
.
Condition
,
len
(
*
in
))
for
i
:=
range
*
in
{
(
*
in
)[
i
]
.
DeepCopyInto
(
&
(
*
out
)[
i
])
}
...
...
@@ -174,6 +173,51 @@ func (in *DeploymentStatus) DeepCopy() *DeploymentStatus {
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
DeploymentTargetHPAConf
)
DeepCopyInto
(
out
*
DeploymentTargetHPAConf
)
{
*
out
=
*
in
if
in
.
CPU
!=
nil
{
in
,
out
:=
&
in
.
CPU
,
&
out
.
CPU
*
out
=
new
(
int32
)
**
out
=
**
in
}
if
in
.
GPU
!=
nil
{
in
,
out
:=
&
in
.
GPU
,
&
out
.
GPU
*
out
=
new
(
int32
)
**
out
=
**
in
}
if
in
.
Memory
!=
nil
{
in
,
out
:=
&
in
.
Memory
,
&
out
.
Memory
*
out
=
new
(
string
)
**
out
=
**
in
}
if
in
.
QPS
!=
nil
{
in
,
out
:=
&
in
.
QPS
,
&
out
.
QPS
*
out
=
new
(
int64
)
**
out
=
**
in
}
if
in
.
MinReplicas
!=
nil
{
in
,
out
:=
&
in
.
MinReplicas
,
&
out
.
MinReplicas
*
out
=
new
(
int32
)
**
out
=
**
in
}
if
in
.
MaxReplicas
!=
nil
{
in
,
out
:=
&
in
.
MaxReplicas
,
&
out
.
MaxReplicas
*
out
=
new
(
int32
)
**
out
=
**
in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentTargetHPAConf.
func
(
in
*
DeploymentTargetHPAConf
)
DeepCopy
()
*
DeploymentTargetHPAConf
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
DeploymentTargetHPAConf
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
DynamoComponentDeployment
)
DeepCopyInto
(
out
*
DynamoComponentDeployment
)
{
*
out
=
*
in
...
...
@@ -257,7 +301,7 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
}
if
in
.
Resources
!=
nil
{
in
,
out
:=
&
in
.
Resources
,
&
out
.
Resources
*
out
=
new
(
common
.
Resources
)
*
out
=
new
(
Resources
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
Autoscaling
!=
nil
{
...
...
@@ -267,7 +311,7 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
}
if
in
.
Envs
!=
nil
{
in
,
out
:=
&
in
.
Envs
,
&
out
.
Envs
*
out
=
make
([]
core
v1
.
EnvVar
,
len
(
*
in
))
*
out
=
make
([]
v1
.
EnvVar
,
len
(
*
in
))
for
i
:=
range
*
in
{
(
*
in
)[
i
]
.
DeepCopyInto
(
&
(
*
out
)[
i
])
}
...
...
@@ -299,22 +343,22 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
}
if
in
.
ExtraPodMetadata
!=
nil
{
in
,
out
:=
&
in
.
ExtraPodMetadata
,
&
out
.
ExtraPodMetadata
*
out
=
new
(
common
.
ExtraPodMetadata
)
*
out
=
new
(
ExtraPodMetadata
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
ExtraPodSpec
!=
nil
{
in
,
out
:=
&
in
.
ExtraPodSpec
,
&
out
.
ExtraPodSpec
*
out
=
new
(
common
.
ExtraPodSpec
)
*
out
=
new
(
ExtraPodSpec
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
LivenessProbe
!=
nil
{
in
,
out
:=
&
in
.
LivenessProbe
,
&
out
.
LivenessProbe
*
out
=
new
(
core
v1
.
Probe
)
*
out
=
new
(
v1
.
Probe
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
ReadinessProbe
!=
nil
{
in
,
out
:=
&
in
.
ReadinessProbe
,
&
out
.
ReadinessProbe
*
out
=
new
(
core
v1
.
Probe
)
*
out
=
new
(
v1
.
Probe
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
Replicas
!=
nil
{
...
...
@@ -360,7 +404,7 @@ func (in *DynamoComponentDeploymentStatus) DeepCopyInto(out *DynamoComponentDepl
*
out
=
*
in
if
in
.
Conditions
!=
nil
{
in
,
out
:=
&
in
.
Conditions
,
&
out
.
Conditions
*
out
=
make
([]
v1
.
Condition
,
len
(
*
in
))
*
out
=
make
([]
meta
v1
.
Condition
,
len
(
*
in
))
for
i
:=
range
*
in
{
(
*
in
)[
i
]
.
DeepCopyInto
(
&
(
*
out
)[
i
])
}
...
...
@@ -528,7 +572,7 @@ func (in *DynamoGraphDeploymentRequestStatus) DeepCopyInto(out *DynamoGraphDeplo
*
out
=
*
in
if
in
.
Conditions
!=
nil
{
in
,
out
:=
&
in
.
Conditions
,
&
out
.
Conditions
*
out
=
make
([]
v1
.
Condition
,
len
(
*
in
))
*
out
=
make
([]
meta
v1
.
Condition
,
len
(
*
in
))
for
i
:=
range
*
in
{
(
*
in
)[
i
]
.
DeepCopyInto
(
&
(
*
out
)[
i
])
}
...
...
@@ -583,7 +627,7 @@ func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec
}
if
in
.
Envs
!=
nil
{
in
,
out
:=
&
in
.
Envs
,
&
out
.
Envs
*
out
=
make
([]
core
v1
.
EnvVar
,
len
(
*
in
))
*
out
=
make
([]
v1
.
EnvVar
,
len
(
*
in
))
for
i
:=
range
*
in
{
(
*
in
)[
i
]
.
DeepCopyInto
(
&
(
*
out
)[
i
])
}
...
...
@@ -605,7 +649,7 @@ func (in *DynamoGraphDeploymentStatus) DeepCopyInto(out *DynamoGraphDeploymentSt
*
out
=
*
in
if
in
.
Conditions
!=
nil
{
in
,
out
:=
&
in
.
Conditions
,
&
out
.
Conditions
*
out
=
make
([]
v1
.
Condition
,
len
(
*
in
))
*
out
=
make
([]
meta
v1
.
Condition
,
len
(
*
in
))
for
i
:=
range
*
in
{
(
*
in
)[
i
]
.
DeepCopyInto
(
&
(
*
out
)[
i
])
}
...
...
@@ -711,7 +755,7 @@ func (in *DynamoModelStatus) DeepCopyInto(out *DynamoModelStatus) {
}
if
in
.
Conditions
!=
nil
{
in
,
out
:=
&
in
.
Conditions
,
&
out
.
Conditions
*
out
=
make
([]
v1
.
Condition
,
len
(
*
in
))
*
out
=
make
([]
meta
v1
.
Condition
,
len
(
*
in
))
for
i
:=
range
*
in
{
(
*
in
)[
i
]
.
DeepCopyInto
(
&
(
*
out
)[
i
])
}
...
...
@@ -743,6 +787,60 @@ func (in *EndpointInfo) DeepCopy() *EndpointInfo {
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
ExtraPodMetadata
)
DeepCopyInto
(
out
*
ExtraPodMetadata
)
{
*
out
=
*
in
if
in
.
Annotations
!=
nil
{
in
,
out
:=
&
in
.
Annotations
,
&
out
.
Annotations
*
out
=
make
(
map
[
string
]
string
,
len
(
*
in
))
for
key
,
val
:=
range
*
in
{
(
*
out
)[
key
]
=
val
}
}
if
in
.
Labels
!=
nil
{
in
,
out
:=
&
in
.
Labels
,
&
out
.
Labels
*
out
=
make
(
map
[
string
]
string
,
len
(
*
in
))
for
key
,
val
:=
range
*
in
{
(
*
out
)[
key
]
=
val
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodMetadata.
func
(
in
*
ExtraPodMetadata
)
DeepCopy
()
*
ExtraPodMetadata
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
ExtraPodMetadata
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
ExtraPodSpec
)
DeepCopyInto
(
out
*
ExtraPodSpec
)
{
*
out
=
*
in
if
in
.
PodSpec
!=
nil
{
in
,
out
:=
&
in
.
PodSpec
,
&
out
.
PodSpec
*
out
=
new
(
v1
.
PodSpec
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
MainContainer
!=
nil
{
in
,
out
:=
&
in
.
MainContainer
,
&
out
.
MainContainer
*
out
=
new
(
v1
.
Container
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodSpec.
func
(
in
*
ExtraPodSpec
)
DeepCopy
()
*
ExtraPodSpec
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
ExtraPodSpec
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
IngressSpec
)
DeepCopyInto
(
out
*
IngressSpec
)
{
*
out
=
*
in
...
...
@@ -812,6 +910,21 @@ func (in *IngressTLSSpec) DeepCopy() *IngressTLSSpec {
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
LabelItemSchema
)
DeepCopyInto
(
out
*
LabelItemSchema
)
{
*
out
=
*
in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelItemSchema.
func
(
in
*
LabelItemSchema
)
DeepCopy
()
*
LabelItemSchema
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
LabelItemSchema
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
ModelReference
)
DeepCopyInto
(
out
*
ModelReference
)
{
*
out
=
*
in
...
...
@@ -908,6 +1021,58 @@ func (in *ProfilingConfigSpec) DeepCopy() *ProfilingConfigSpec {
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
ResourceItem
)
DeepCopyInto
(
out
*
ResourceItem
)
{
*
out
=
*
in
if
in
.
Custom
!=
nil
{
in
,
out
:=
&
in
.
Custom
,
&
out
.
Custom
*
out
=
make
(
map
[
string
]
string
,
len
(
*
in
))
for
key
,
val
:=
range
*
in
{
(
*
out
)[
key
]
=
val
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceItem.
func
(
in
*
ResourceItem
)
DeepCopy
()
*
ResourceItem
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
ResourceItem
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
Resources
)
DeepCopyInto
(
out
*
Resources
)
{
*
out
=
*
in
if
in
.
Requests
!=
nil
{
in
,
out
:=
&
in
.
Requests
,
&
out
.
Requests
*
out
=
new
(
ResourceItem
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
Limits
!=
nil
{
in
,
out
:=
&
in
.
Limits
,
&
out
.
Limits
*
out
=
new
(
ResourceItem
)
(
*
in
)
.
DeepCopyInto
(
*
out
)
}
if
in
.
Claims
!=
nil
{
in
,
out
:=
&
in
.
Claims
,
&
out
.
Claims
*
out
=
make
([]
v1
.
ResourceClaim
,
len
(
*
in
))
copy
(
*
out
,
*
in
)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resources.
func
(
in
*
Resources
)
DeepCopy
()
*
Resources
{
if
in
==
nil
{
return
nil
}
out
:=
new
(
Resources
)
in
.
DeepCopyInto
(
out
)
return
out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func
(
in
*
SharedMemorySpec
)
DeepCopyInto
(
out
*
SharedMemorySpec
)
{
*
out
=
*
in
...
...
deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml
View file @
9caafa55
...
...
@@ -10193,6 +10193,7 @@ spec:
GPUs/devices, and any runtime-specific resources.
properties:
claims:
description: Claims specifies resource claims for dynamic resource allocation
items:
description: ResourceClaim references one entry in PodSpec.ResourceClaims.
properties:
...
...
@@ -10213,35 +10214,53 @@ spec:
type: object
type: array
limits:
description: Limits specifies the maximum resources allowed for the component
properties:
cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string
custom:
additionalProperties:
type: string
description: Custom specifies additional custom resource requests/limits
type: object
gpu:
description: |-
Indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU indicates the number of GPUs to request.
Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string
memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string
type: object
requests:
description: Requests specifies the minimum resources required by the component
properties:
cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string
custom:
additionalProperties:
type: string
description: Custom specifies additional custom resource requests/limits
type: object
gpu:
description: |-
Indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU indicates the number of GPUs to request.
Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string
memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string
type: object
type: object
...
...
deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
View file @
9caafa55
...
...
@@ -10328,6 +10328,7 @@ spec:
GPUs/devices, and any runtime-specific resources.
properties:
claims:
description: Claims specifies resource claims for dynamic resource allocation
items:
description: ResourceClaim references one entry in PodSpec.ResourceClaims.
properties:
...
...
@@ -10348,35 +10349,53 @@ spec:
type: object
type: array
limits:
description: Limits specifies the maximum resources allowed for the component
properties:
cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string
custom:
additionalProperties:
type: string
description: Custom specifies additional custom resource requests/limits
type: object
gpu:
description: |-
Indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU indicates the number of GPUs to request.
Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string
memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string
type: object
requests:
description: Requests specifies the minimum resources required by the component
properties:
cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string
custom:
additionalProperties:
type: string
description: Custom specifies additional custom resource requests/limits
type: object
gpu:
description: |-
Indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU indicates the number of GPUs to request.
Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string
memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string
type: object
type: object
...
...
deploy/cloud/operator/
api/dynamo/schemas
/schemas.go
→
deploy/cloud/operator/
internal/common
/schemas.go
View file @
9caafa55
...
...
@@ -15,51 +15,7 @@
* limitations under the License.
*/
package
schemas
import
(
"encoding/json"
"errors"
"time"
)
type
DynamoComponent
struct
{
PresignedDownloadUrl
string
`json:"presigned_download_url"`
TransmissionStrategy
*
TransmissionStrategy
`json:"transmission_strategy"`
}
type
TransmissionStrategy
string
const
(
TransmissionStrategyPresignedURL
TransmissionStrategy
=
"presigned_url"
TransmissionStrategyProxy
TransmissionStrategy
=
"proxy"
)
type
Duration
time
.
Duration
func
(
d
Duration
)
MarshalJSON
()
([]
byte
,
error
)
{
return
json
.
Marshal
(
time
.
Duration
(
d
)
.
String
())
}
func
(
d
*
Duration
)
UnmarshalJSON
(
b
[]
byte
)
error
{
var
v
any
if
err
:=
json
.
Unmarshal
(
b
,
&
v
);
err
!=
nil
{
return
err
}
switch
value
:=
v
.
(
type
)
{
case
float64
:
*
d
=
Duration
(
time
.
Duration
(
value
))
case
string
:
tmp
,
err
:=
time
.
ParseDuration
(
value
)
if
err
!=
nil
{
return
err
}
*
d
=
Duration
(
tmp
)
default
:
return
errors
.
New
(
"invalid duration"
)
}
return
nil
}
package
common
type
DeploymentStrategy
string
...
...
@@ -69,10 +25,3 @@ const (
DeploymentStrategyRampedSlowRollout
DeploymentStrategy
=
"RampedSlowRollout"
DeploymentStrategyBestEffortControlledRollout
DeploymentStrategy
=
"BestEffortControlledRollout"
)
type
DockerRegistrySchema
struct
{
DynamoRepositoryURI
string
`json:"dynamoRepositoryURI"`
Server
string
`json:"server"`
SecretName
string
`json:"secretName"`
Secure
bool
`json:"secure"`
}
deploy/cloud/operator/internal/common/url.go
View file @
9caafa55
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package
common
import
(
...
...
deploy/cloud/operator/internal/common/url_test.go
View file @
9caafa55
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package
common
import
"testing"
...
...
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
View file @
9caafa55
...
...
@@ -33,8 +33,8 @@ import (
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"emperror.dev/errors"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/schemas"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/common"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonController
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/dynamo"
...
...
@@ -1003,9 +1003,9 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con
resourceAnnotations
:=
getResourceAnnotations
(
opt
.
dynamoComponentDeployment
)
strategyStr
:=
resourceAnnotations
[
KubeAnnotationDeploymentStrategy
]
if
strategyStr
!=
""
{
strategyType
:=
schemas
.
DeploymentStrategy
(
strategyStr
)
strategyType
:=
common
.
DeploymentStrategy
(
strategyStr
)
switch
strategyType
{
case
schemas
.
DeploymentStrategyRollingUpdate
:
case
common
.
DeploymentStrategyRollingUpdate
:
strategy
=
appsv1
.
DeploymentStrategy
{
Type
:
appsv1
.
RollingUpdateDeploymentStrategyType
,
RollingUpdate
:
&
appsv1
.
RollingUpdateDeployment
{
...
...
@@ -1013,11 +1013,11 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con
MaxUnavailable
:
&
defaultMaxUnavailable
,
},
}
case
schemas
.
DeploymentStrategyRecreate
:
case
common
.
DeploymentStrategyRecreate
:
strategy
=
appsv1
.
DeploymentStrategy
{
Type
:
appsv1
.
RecreateDeploymentStrategyType
,
}
case
schemas
.
DeploymentStrategyRampedSlowRollout
:
case
common
.
DeploymentStrategyRampedSlowRollout
:
strategy
=
appsv1
.
DeploymentStrategy
{
Type
:
appsv1
.
RollingUpdateDeploymentStrategyType
,
RollingUpdate
:
&
appsv1
.
RollingUpdateDeployment
{
...
...
@@ -1025,7 +1025,7 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con
MaxUnavailable
:
&
[]
intstr
.
IntOrString
{
intstr
.
FromInt
(
0
)}[
0
],
},
}
case
schemas
.
DeploymentStrategyBestEffortControlledRollout
:
case
common
.
DeploymentStrategyBestEffortControlledRollout
:
strategy
=
appsv1
.
DeploymentStrategy
{
Type
:
appsv1
.
RollingUpdateDeploymentStrategyType
,
RollingUpdate
:
&
appsv1
.
RollingUpdateDeployment
{
...
...
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
View file @
9caafa55
...
...
@@ -24,7 +24,6 @@ import (
"fmt"
"testing"
dynamoCommon
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
...
...
@@ -703,18 +702,18 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Multinode
:
&
v1alpha1
.
MultinodeSpec
{
NodeCount
:
2
,
},
Resources
:
&
dynamoCommon
.
Resources
{
Requests
:
&
dynamoCommon
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Requests
:
&
v1alpha1
.
ResourceItem
{
CPU
:
"300m"
,
Memory
:
"500Mi"
,
},
Limits
:
&
dynamoCommon
.
ResourceItem
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"1"
,
Memory
:
"20Gi"
,
CPU
:
"10"
,
},
},
ExtraPodMetadata
:
&
dynamoCommon
.
ExtraPodMetadata
{
ExtraPodMetadata
:
&
v1alpha1
.
ExtraPodMetadata
{
Annotations
:
map
[
string
]
string
{
"nvidia.com/annotation1"
:
"annotation1"
,
},
...
...
@@ -722,7 +721,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
"nvidia.com/label1"
:
"label1"
,
},
},
ExtraPodSpec
:
&
dynamoCommon
.
ExtraPodSpec
{
ExtraPodSpec
:
&
v1alpha1
.
ExtraPodSpec
{
PodSpec
:
&
corev1
.
PodSpec
{
TerminationGracePeriodSeconds
:
ptr
.
To
(
int64
(
10
)),
Containers
:
[]
corev1
.
Container
{
...
...
@@ -866,7 +865,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"20Gi"
),
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"10"
),
"nvidia.com/gpu"
:
resource
.
MustParse
(
"1"
),
corev1
.
ResourceName
(
commonconsts
.
KubeResourceGPUNvidia
)
:
resource
.
MustParse
(
"1"
),
},
},
LivenessProbe
:
&
corev1
.
Probe
{
...
...
@@ -1024,12 +1023,12 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Multinode
:
&
v1alpha1
.
MultinodeSpec
{
NodeCount
:
2
,
},
Resources
:
&
dynamoCommon
.
Resources
{
Limits
:
&
dynamoCommon
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"1"
,
},
},
ExtraPodSpec
:
&
dynamoCommon
.
ExtraPodSpec
{
ExtraPodSpec
:
&
v1alpha1
.
ExtraPodSpec
{
MainContainer
:
&
corev1
.
Container
{
Image
:
"test-image:latest"
,
},
...
...
@@ -1067,12 +1066,12 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Multinode
:
&
v1alpha1
.
MultinodeSpec
{
NodeCount
:
2
,
},
Resources
:
&
dynamoCommon
.
Resources
{
Limits
:
&
dynamoCommon
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"1"
,
},
},
ExtraPodSpec
:
&
dynamoCommon
.
ExtraPodSpec
{
ExtraPodSpec
:
&
v1alpha1
.
ExtraPodSpec
{
MainContainer
:
&
corev1
.
Container
{
Image
:
""
,
// Image is missing, will cause error in generatePodTemplateSpec
},
...
...
deploy/cloud/operator/internal/controller_common/resource.go
View file @
9caafa55
...
...
@@ -25,7 +25,7 @@ import (
"reflect"
"sort"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/
dynamo/common
"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/
v1alpha1
"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/google/go-cmp/cmp"
corev1
"k8s.io/api/core/v1"
...
...
@@ -386,7 +386,7 @@ func firstKey(m map[string]interface{}) string {
return
keys
[
0
]
}
func
GetResourcesConfig
(
resources
*
common
.
Resources
)
(
*
corev1
.
ResourceRequirements
,
error
)
{
func
GetResourcesConfig
(
resources
*
v1alpha1
.
Resources
)
(
*
corev1
.
ResourceRequirements
,
error
)
{
if
resources
==
nil
{
return
nil
,
nil
...
...
@@ -423,7 +423,7 @@ func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequiremen
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceName
(
consts
.
KubeResourceGPUNvidia
)]
=
q
currentResources
.
Limits
[
getGPU
ResourceName
(
resources
.
Limits
)]
=
q
}
for
k
,
v
:=
range
resources
.
Limits
.
Custom
{
q
,
err
:=
resource
.
ParseQuantity
(
v
)
...
...
@@ -477,6 +477,16 @@ func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequiremen
return
currentResources
,
nil
}
func
getGPUResourceName
(
resourceItem
*
v1alpha1
.
ResourceItem
)
corev1
.
ResourceName
{
if
resourceItem
==
nil
{
return
corev1
.
ResourceName
(
consts
.
KubeResourceGPUNvidia
)
}
if
resourceItem
.
GPUType
!=
""
{
return
corev1
.
ResourceName
(
resourceItem
.
GPUType
)
}
return
corev1
.
ResourceName
(
consts
.
KubeResourceGPUNvidia
)
}
type
Resource
struct
{
client
.
Object
isReady
func
()
(
bool
,
string
)
...
...
deploy/cloud/operator/internal/controller_common/resource_test.go
View file @
9caafa55
...
...
@@ -21,8 +21,11 @@ import (
"testing"
appsv1
"k8s.io/api/apps/v1"
corev1
"k8s.io/api/core/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/bsm/gomega"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"sigs.k8s.io/controller-runtime/pkg/client"
...
...
@@ -474,3 +477,58 @@ func TestCopySpec(t *testing.T) {
g
:=
gomega
.
NewGomegaWithT
(
t
)
g
.
Expect
(
dst
)
.
To
(
gomega
.
Equal
(
expected
))
}
func
TestGetResourcesConfig
(
t
*
testing
.
T
)
{
tests
:=
[]
struct
{
name
string
resources
*
v1alpha1
.
Resources
expectedGPULimit
corev1
.
ResourceName
expectedGPUValue
string
expectError
bool
}{
{
name
:
"limits.gpu defined with no gpuType"
,
resources
:
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"4"
,
},
},
expectedGPULimit
:
corev1
.
ResourceName
(
consts
.
KubeResourceGPUNvidia
),
expectedGPUValue
:
"4"
,
expectError
:
false
,
},
{
name
:
"limits.gpu defined with custom gpuType"
,
resources
:
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"8"
,
GPUType
:
"gpu.custom-type.com/test"
,
},
},
expectedGPULimit
:
corev1
.
ResourceName
(
"gpu.custom-type.com/test"
),
expectedGPUValue
:
"8"
,
expectError
:
false
,
},
}
for
_
,
tt
:=
range
tests
{
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
g
:=
gomega
.
NewGomegaWithT
(
t
)
result
,
err
:=
GetResourcesConfig
(
tt
.
resources
)
if
tt
.
expectError
{
g
.
Expect
(
err
)
.
To
(
gomega
.
HaveOccurred
())
return
}
g
.
Expect
(
err
)
.
To
(
gomega
.
BeNil
())
g
.
Expect
(
result
)
.
ToNot
(
gomega
.
BeNil
())
g
.
Expect
(
result
.
Limits
)
.
ToNot
(
gomega
.
BeNil
())
gpuQuantity
,
exists
:=
result
.
Limits
[
tt
.
expectedGPULimit
]
g
.
Expect
(
exists
)
.
To
(
gomega
.
BeTrue
(),
"GPU resource %s should exist in limits"
,
tt
.
expectedGPULimit
)
g
.
Expect
(
gpuQuantity
.
String
())
.
To
(
gomega
.
Equal
(
tt
.
expectedGPUValue
))
})
}
}
deploy/cloud/operator/internal/dynamo/backend_trtllm.go
View file @
9caafa55
...
...
@@ -6,7 +6,6 @@ import (
"strconv"
"strings"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1
"k8s.io/api/core/v1"
...
...
@@ -197,7 +196,7 @@ func (b *TRTLLMBackend) generateWorkerHostnames(numberOfNodes int32, serviceName
}
// getGPUsPerNode extracts the number of GPUs per node from resources
func
getGPUsPerNode
(
resources
*
common
.
Resources
)
int32
{
func
getGPUsPerNode
(
resources
*
v1alpha1
.
Resources
)
int32
{
if
resources
!=
nil
&&
resources
.
Requests
!=
nil
&&
resources
.
Requests
.
GPU
!=
""
{
if
gpus
,
err
:=
strconv
.
ParseInt
(
resources
.
Requests
.
GPU
,
10
,
32
);
err
==
nil
{
return
int32
(
gpus
)
...
...
deploy/cloud/operator/internal/dynamo/backend_trtllm_test.go
View file @
9caafa55
...
...
@@ -4,7 +4,6 @@ import (
"strings"
"testing"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1
"k8s.io/api/core/v1"
...
...
@@ -52,8 +51,8 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
role
:
RoleLeader
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Requests
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"2"
,
},
},
...
...
@@ -106,8 +105,8 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
role
:
RoleLeader
,
multinodeDeployer
:
&
LWSMultinodeDeployer
{},
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Resources
:
&
common
.
Resources
{
Limits
:
&
common
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"1"
,
},
},
...
...
@@ -557,8 +556,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
serviceName
:
"test-service"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Requests
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"2"
,
},
},
...
...
@@ -583,8 +582,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
serviceName
:
"test"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Resources
:
&
common
.
Resources
{
Limits
:
&
common
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"1"
,
},
},
...
...
@@ -599,8 +598,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
serviceName
:
"test"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Resources
:
&
common
.
Resources
{
Limits
:
&
common
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"1"
,
},
},
...
...
@@ -615,8 +614,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
serviceName
:
"test"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Resources
:
&
common
.
Resources
{
Limits
:
&
common
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"1"
,
},
},
...
...
@@ -631,8 +630,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
serviceName
:
"test"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Resources
:
&
common
.
Resources
{
Limits
:
&
common
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"1"
,
},
},
...
...
@@ -647,8 +646,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
serviceName
:
"test"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Requests
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"1"
,
},
},
...
...
@@ -663,8 +662,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
serviceName
:
"test"
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
Resources
:
&
v1alpha1
.
Resources
{
Requests
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"1"
,
},
},
...
...
@@ -778,7 +777,7 @@ func TestTRTLLMBackend_setupWorkerContainer(t *testing.T) {
func
TestTRTLLMBackend_getGPUsPerNode
(
t
*
testing
.
T
)
{
tests
:=
[]
struct
{
name
string
resources
*
common
.
Resources
resources
*
v1alpha1
.
Resources
expected
int32
}{
{
...
...
@@ -788,13 +787,13 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
},
{
name
:
"Empty resources - default to 0"
,
resources
:
&
common
.
Resources
{},
resources
:
&
v1alpha1
.
Resources
{},
expected
:
0
,
},
{
name
:
"GPU in requests"
,
resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
resources
:
&
v1alpha1
.
Resources
{
Requests
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"2"
,
},
},
...
...
@@ -802,8 +801,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
},
{
name
:
"GPU in limits"
,
resources
:
&
common
.
Resources
{
Limits
:
&
common
.
ResourceItem
{
resources
:
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"4"
,
},
},
...
...
@@ -811,11 +810,11 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
},
{
name
:
"GPU in both requests and limits - requests takes precedence"
,
resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
resources
:
&
v1alpha1
.
Resources
{
Requests
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"3"
,
},
Limits
:
&
common
.
ResourceItem
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"8"
,
},
},
...
...
@@ -823,8 +822,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
},
{
name
:
"Invalid GPU value - default to 0"
,
resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
resources
:
&
v1alpha1
.
Resources
{
Requests
:
&
v1alpha1
.
ResourceItem
{
GPU
:
"invalid"
,
},
},
...
...
@@ -832,8 +831,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
},
{
name
:
"Empty GPU string - default to 0"
,
resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
resources
:
&
v1alpha1
.
Resources
{
Requests
:
&
v1alpha1
.
ResourceItem
{
GPU
:
""
,
},
},
...
...
deploy/cloud/operator/internal/dynamo/backend_vllm.go
View file @
9caafa55
...
...
@@ -6,7 +6,6 @@ import (
"strings"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1
"k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/log"
)
...
...
@@ -26,7 +25,7 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes
if
isMultinode
{
// Apply multinode-specific argument modifications
updateVLLMMultinodeArgs
(
container
,
role
,
serviceName
,
multinodeDeployer
)
updateVLLMMultinodeArgs
(
container
,
role
,
serviceName
,
multinodeDeployer
,
component
.
Resources
)
// Remove probes for multinode worker and leader
if
role
==
RoleWorker
{
...
...
@@ -72,12 +71,12 @@ func (b *VLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32
// updateVLLMMultinodeArgs will inject Ray-specific flags for tensor parallel multinode deployments
// OR data parallel flags for data parallel multinode deployments
func
updateVLLMMultinodeArgs
(
container
*
corev1
.
Container
,
role
Role
,
serviceName
string
,
multinodeDeployer
MultinodeDeployer
)
{
func
updateVLLMMultinodeArgs
(
container
*
corev1
.
Container
,
role
Role
,
serviceName
string
,
multinodeDeployer
MultinodeDeployer
,
resources
*
v1alpha1
.
Resources
)
{
expandedArgs
:=
getExpandedArgs
(
container
)
if
needsRayDistributedLaunch
(
expandedArgs
,
container
.
R
esources
)
{
if
needsRayDistributedLaunch
(
expandedArgs
,
r
esources
)
{
injectRayDistributedLaunchFlags
(
container
,
role
,
serviceName
,
multinodeDeployer
)
}
else
if
needsDataParallelLaunch
(
expandedArgs
,
container
.
R
esources
)
{
injectDataParallelLaunchFlags
(
container
,
role
,
serviceName
,
multinodeDeployer
)
}
else
if
needsDataParallelLaunch
(
expandedArgs
,
r
esources
)
{
injectDataParallelLaunchFlags
(
container
,
role
,
serviceName
,
multinodeDeployer
,
resources
)
}
else
{
logger
:=
log
.
Log
.
WithName
(
"vllm-backend"
)
logger
.
Info
(
"No need to inject Ray-specific or data parallel flags for multinode deployments"
,
"args"
,
strings
.
Join
(
container
.
Args
,
" "
))
...
...
@@ -109,10 +108,10 @@ func injectRayDistributedLaunchFlags(container *corev1.Container, role Role, ser
container
.
Command
=
[]
string
{
"/bin/sh"
,
"-c"
}
// ensure cmd is a shell
}
func
injectDataParallelLaunchFlags
(
container
*
corev1
.
Container
,
role
Role
,
serviceName
string
,
multinodeDeployer
MultinodeDeployer
)
{
func
injectDataParallelLaunchFlags
(
container
*
corev1
.
Container
,
role
Role
,
serviceName
string
,
multinodeDeployer
MultinodeDeployer
,
resources
*
v1alpha1
.
Resources
)
{
expandedArgs
:=
getExpandedArgs
(
container
)
leaderHostname
:=
multinodeDeployer
.
GetLeaderHostname
(
serviceName
)
dataParallelSizeLocal
:=
getContainerGPUs
(
container
.
R
esources
)
/
getWorldSize
(
expandedArgs
)
dataParallelSizeLocal
:=
getContainerGPUs
(
r
esources
)
/
getWorldSize
(
expandedArgs
)
var
startRank
string
switch
role
{
case
RoleWorker
:
...
...
@@ -134,8 +133,12 @@ func injectDataParallelLaunchFlags(container *corev1.Container, role Role, servi
// if world size (within DP rank) > GPU count, then we need to inject ray
// world size = tensor parallel size * pipeline parallel size
func
needsRayDistributedLaunch
(
expandedArgs
[]
string
,
resources
corev1
.
ResourceRequirements
)
bool
{
return
getWorldSize
(
expandedArgs
)
>
getContainerGPUs
(
resources
)
func
needsRayDistributedLaunch
(
expandedArgs
[]
string
,
resources
*
v1alpha1
.
Resources
)
bool
{
containerGPUs
:=
getContainerGPUs
(
resources
)
if
containerGPUs
==
0
{
return
false
}
return
getWorldSize
(
expandedArgs
)
>
containerGPUs
}
func
getWorldSize
(
expandedArgs
[]
string
)
int64
{
...
...
@@ -145,9 +148,13 @@ func getWorldSize(expandedArgs []string) int64 {
}
// if world size across all DP ranks > GPU count, then we need to inject data parallel multinode coordination
func
needsDataParallelLaunch
(
expandedArgs
[]
string
,
resources
corev
1
.
Resource
Requirement
s
)
bool
{
func
needsDataParallelLaunch
(
expandedArgs
[]
string
,
resources
*
v1alpha
1
.
Resources
)
bool
{
dataParallelSize
:=
getFlagValue
(
expandedArgs
,
dataParallelSizeFlag
)
return
getWorldSize
(
expandedArgs
)
*
dataParallelSize
>
getContainerGPUs
(
resources
)
containerGPUs
:=
getContainerGPUs
(
resources
)
if
containerGPUs
==
0
{
return
false
}
return
getWorldSize
(
expandedArgs
)
*
dataParallelSize
>
containerGPUs
}
func
getFlagValue
(
expandedArgs
[]
string
,
flag
string
)
int64
{
...
...
@@ -164,14 +171,12 @@ func getFlagValue(expandedArgs []string, flag string) int64 {
return
flagValue
}
func
getContainerGPUs
(
resources
corev1
.
ResourceRequirements
)
int64
{
var
containerGPUs
int64
=
1
// Requests defaults to Limits, doesn't make sense in case where Requests < Limits for gpus
for
name
,
quantity
:=
range
resources
.
Limits
{
if
name
.
String
()
==
consts
.
KubeResourceGPUNvidia
{
containerGPUs
=
quantity
.
Value
()
break
func
getContainerGPUs
(
resources
*
v1alpha1
.
Resources
)
int64
{
if
resources
==
nil
||
resources
.
Limits
==
nil
||
resources
.
Limits
.
GPU
==
""
{
return
0
}
if
gpus
,
err
:=
strconv
.
ParseInt
(
resources
.
Limits
.
GPU
,
10
,
64
);
err
==
nil
{
return
gpus
}
return
containerGPUs
return
0
}
deploy/cloud/operator/internal/dynamo/backend_vllm_test.go
View file @
9caafa55
...
...
@@ -3,12 +3,12 @@ package dynamo
import
(
"fmt"
"reflect"
"strconv"
"testing"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/onsi/gomega"
corev1
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)
func
TestVLLMBackend_UpdateContainer
(
t
*
testing
.
T
)
{
...
...
@@ -21,6 +21,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
component
*
v1alpha1
.
DynamoComponentDeploymentSharedSpec
multinodeDeployer
MultinodeDeployer
initialContainer
*
corev1
.
Container
gpuCount
int64
// GPU count for the test case
expectedArgs
[]
string
expectNotModified
bool
// If true, container args should not change
expectProbesRemoved
bool
// If true, probes should be nil
...
...
@@ -32,6 +33,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{},
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
}},
gpuCount
:
0
,
expectNotModified
:
true
,
},
{
...
...
@@ -40,7 +42,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
role
:
RoleLeader
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{},
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
},
Args
:
[]
string
{
"--model"
,
"test"
,
tensorParallelSizeFlag
,
"8"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"4"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
},
Args
:
[]
string
{
"--model"
,
"test"
,
tensorParallelSizeFlag
,
"8"
}},
gpuCount
:
4
,
expectedArgs
:
[]
string
{
fmt
.
Sprintf
(
"ray start --head --port=%s && python3 -m dynamo.vllm --model test %s 8"
,
VLLMPort
,
tensorParallelSizeFlag
)},
expectProbesRemoved
:
true
,
},
...
...
@@ -50,7 +53,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
role
:
RoleWorker
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{},
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
"--model"
,
"test"
,
tensorParallelSizeFlag
,
"8"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"4"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
"--model"
,
"test"
,
tensorParallelSizeFlag
,
"8"
}},
gpuCount
:
4
,
expectedArgs
:
[]
string
{
"ray start --address=$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE):6379 --block"
},
expectProbesRemoved
:
true
,
},
...
...
@@ -60,7 +64,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
role
:
RoleWorker
,
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{},
multinodeDeployer
:
&
LWSMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
,
tensorParallelSizeFlag
,
"8"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"4"
)}}},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
,
tensorParallelSizeFlag
,
"8"
}},
gpuCount
:
4
,
expectedArgs
:
[]
string
{
"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"
},
expectProbesRemoved
:
true
,
},
...
...
@@ -71,6 +76,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{},
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{}},
gpuCount
:
0
,
expectNotModified
:
true
,
// Should not modify empty args
},
{
...
...
@@ -80,6 +86,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
component
:
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{},
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.frontend"
}},
gpuCount
:
0
,
expectNotModified
:
true
,
},
}
...
...
@@ -90,6 +97,15 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
initialContainerArgs
:=
append
([]
string
{},
tt
.
initialContainer
.
Args
...
)
// Create resources from GPU count and set in component
if
tt
.
gpuCount
>
0
{
tt
.
component
.
Resources
=
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
strconv
.
FormatInt
(
tt
.
gpuCount
,
10
),
},
}
}
// Call UpdateContainer
backend
.
UpdateContainer
(
tt
.
initialContainer
,
tt
.
numberOfNodes
,
tt
.
role
,
tt
.
component
,
"test-service"
,
tt
.
multinodeDeployer
)
...
...
@@ -119,6 +135,7 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
role
Role
multinodeDeployer
MultinodeDeployer
initialContainer
*
corev1
.
Container
gpuCount
int64
// GPU count for the test case
expectedArgs
[]
string
description
string
}{
...
...
@@ -128,6 +145,7 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
role
:
RoleMain
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
"python3 -m dynamo.vllm"
}},
gpuCount
:
0
,
expectedArgs
:
[]
string
{
"python3 -m dynamo.vllm"
},
description
:
"Single node should not modify shell commands"
,
},
...
...
@@ -136,7 +154,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
numberOfNodes
:
2
,
role
:
RoleLeader
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
fmt
.
Sprintf
(
"python3 -m dynamo.vllm %s 8"
,
dataParallelSizeFlag
)},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"4"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
fmt
.
Sprintf
(
"python3 -m dynamo.vllm %s 8"
,
dataParallelSizeFlag
)}},
gpuCount
:
4
,
expectedArgs
:
[]
string
{
"python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8"
},
description
:
"Shell commands should use regex injection for python commands"
,
},
...
...
@@ -145,7 +164,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
numberOfNodes
:
2
,
role
:
RoleLeader
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
fmt
.
Sprintf
(
"echo blah | wc -l && python3 -m dynamo.vllm %s 8 && ls -al"
,
dataParallelSizeFlag
)},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"4"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
fmt
.
Sprintf
(
"echo blah | wc -l && python3 -m dynamo.vllm %s 8 && ls -al"
,
dataParallelSizeFlag
)}},
gpuCount
:
4
,
expectedArgs
:
[]
string
{
"echo blah | wc -l && python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8 && ls -al"
},
description
:
"Complex shell commands should inject flags only into python part"
,
},
...
...
@@ -154,7 +174,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
numberOfNodes
:
2
,
role
:
RoleLeader
,
multinodeDeployer
:
&
LWSMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
fmt
.
Sprintf
(
"python3 -m dynamo.vllm %s 8"
,
dataParallelSizeFlag
)},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"4"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
fmt
.
Sprintf
(
"python3 -m dynamo.vllm %s 8"
,
dataParallelSizeFlag
)}},
gpuCount
:
4
,
expectedArgs
:
[]
string
{
"python3 -m dynamo.vllm --data-parallel-address $(LWS_LEADER_ADDRESS) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8"
},
description
:
"LWS shell commands should use LWS variables"
,
},
...
...
@@ -163,7 +184,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
numberOfNodes
:
2
,
role
:
RoleLeader
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
fmt
.
Sprintf
(
"python3 -m dynamo.vllm %s 8 | tee /tmp/log"
,
dataParallelSizeFlag
)},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"4"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
fmt
.
Sprintf
(
"python3 -m dynamo.vllm %s 8 | tee /tmp/log"
,
dataParallelSizeFlag
)}},
gpuCount
:
4
,
expectedArgs
:
[]
string
{
"python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8 | tee /tmp/log"
},
description
:
"Shell commands with pipes should inject flags before pipe"
,
},
...
...
@@ -173,7 +195,17 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
expectedCommand
:=
append
([]
string
{},
tt
.
initialContainer
.
Command
...
)
backend
.
UpdateContainer
(
tt
.
initialContainer
,
tt
.
numberOfNodes
,
tt
.
role
,
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{},
"test-service"
,
tt
.
multinodeDeployer
)
// Create component with resources from GPU count
component
:=
&
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{}
if
tt
.
gpuCount
>
0
{
component
.
Resources
=
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
strconv
.
FormatInt
(
tt
.
gpuCount
,
10
),
},
}
}
backend
.
UpdateContainer
(
tt
.
initialContainer
,
tt
.
numberOfNodes
,
tt
.
role
,
component
,
"test-service"
,
tt
.
multinodeDeployer
)
if
!
reflect
.
DeepEqual
(
tt
.
initialContainer
.
Args
,
tt
.
expectedArgs
)
{
t
.
Errorf
(
"UpdateContainer() args = %v, want %v"
,
tt
.
initialContainer
.
Args
,
tt
.
expectedArgs
)
...
...
@@ -296,6 +328,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
role
Role
multinodeDeployer
MultinodeDeployer
initialContainer
*
corev1
.
Container
gpuCount
int64
// GPU count for the test case
expectedArgs
[]
string
expectNotModified
bool
}{
...
...
@@ -303,14 +336,16 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
name
:
"leader prepends ray start --head"
,
role
:
RoleLeader
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
tensorParallelSizeFlag
,
"16"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"8"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
tensorParallelSizeFlag
,
"16"
}},
gpuCount
:
8
,
expectedArgs
:
[]
string
{
fmt
.
Sprintf
(
"ray start --head --port=%s && python3 -m dynamo.vllm %s 16"
,
VLLMPort
,
tensorParallelSizeFlag
)},
},
{
name
:
"leader prepends distributed data parallel flags"
,
role
:
RoleLeader
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
dataParallelSizeFlag
,
"16"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"8"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
dataParallelSizeFlag
,
"16"
}},
gpuCount
:
8
,
expectedArgs
:
[]
string
{
fmt
.
Sprintf
(
"exec python3 -m dynamo.vllm %s 16 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 8 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0"
,
dataParallelSizeFlag
)},
},
{
...
...
@@ -318,34 +353,39 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
role
:
RoleLeader
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{}},
gpuCount
:
0
,
expectNotModified
:
true
,
},
{
name
:
"worker with ray distributed launch Grove"
,
role
:
RoleWorker
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
,
tensorParallelSizeFlag
,
"16"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"8"
)}}},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
,
tensorParallelSizeFlag
,
"16"
}},
gpuCount
:
8
,
expectedArgs
:
[]
string
{
"ray start --address=$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE):6379 --block"
},
},
{
name
:
"worker with data parallel launch Grove"
,
role
:
RoleWorker
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
dataParallelSizeFlag
,
"16"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"8"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
dataParallelSizeFlag
,
"16"
}},
gpuCount
:
8
,
expectedArgs
:
[]
string
{
fmt
.
Sprintf
(
"exec python3 -m dynamo.vllm %s 16 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 8 --data-parallel-rpc-port 13445 --data-parallel-start-rank $(( 8 * $((GROVE_PCLQ_POD_INDEX + 1)) ))"
,
dataParallelSizeFlag
)},
},
{
name
:
"worker with data parallel launch Grove, tp > 1"
,
role
:
RoleWorker
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
dataParallelSizeFlag
,
"8"
,
tensorParallelSizeFlag
,
"2"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"8"
)}}},
initialContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"python3"
},
Args
:
[]
string
{
"-m"
,
"dynamo.vllm"
,
dataParallelSizeFlag
,
"8"
,
tensorParallelSizeFlag
,
"2"
}},
gpuCount
:
8
,
expectedArgs
:
[]
string
{
fmt
.
Sprintf
(
"exec python3 -m dynamo.vllm %s 8 %s 2 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank $(( 4 * $((GROVE_PCLQ_POD_INDEX + 1)) ))"
,
dataParallelSizeFlag
,
tensorParallelSizeFlag
)},
},
{
name
:
"worker with ray distributed launch LWS"
,
role
:
RoleWorker
,
multinodeDeployer
:
&
LWSMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
,
tensorParallelSizeFlag
,
"16"
},
Resources
:
corev1
.
ResourceRequirements
{
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"8"
)}}},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.vllm"
,
tensorParallelSizeFlag
,
"16"
}},
gpuCount
:
8
,
expectedArgs
:
[]
string
{
"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"
},
},
{
...
...
@@ -353,6 +393,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
role
:
RoleMain
,
multinodeDeployer
:
&
GroveMultinodeDeployer
{},
initialContainer
:
&
corev1
.
Container
{
Args
:
[]
string
{
"python3"
,
"-m"
,
"dynamo.frontend"
}},
gpuCount
:
0
,
expectNotModified
:
true
,
},
}
...
...
@@ -362,8 +403,19 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
g
:=
gomega
.
NewGomegaWithT
(
t
)
initialContainerArgs
:=
append
([]
string
{},
tt
.
initialContainer
.
Args
...
)
// Create resources from GPU count
var
resources
*
v1alpha1
.
Resources
if
tt
.
gpuCount
>
0
{
resources
=
&
v1alpha1
.
Resources
{
Limits
:
&
v1alpha1
.
ResourceItem
{
GPU
:
strconv
.
FormatInt
(
tt
.
gpuCount
,
10
),
},
}
}
// Call updateVLLMMultinodeArgs
updateVLLMMultinodeArgs
(
tt
.
initialContainer
,
tt
.
role
,
"test-service"
,
tt
.
multinodeDeployer
)
updateVLLMMultinodeArgs
(
tt
.
initialContainer
,
tt
.
role
,
"test-service"
,
tt
.
multinodeDeployer
,
resources
)
if
tt
.
expectNotModified
{
// Args should not have changed
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment