Unverified Commit 9caafa55 authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

feat: add custom gpu type to CRD (#4408)

parent 33d9ae78
...@@ -10193,6 +10193,7 @@ spec: ...@@ -10193,6 +10193,7 @@ spec:
GPUs/devices, and any runtime-specific resources. GPUs/devices, and any runtime-specific resources.
properties: properties:
claims: claims:
description: Claims specifies resource claims for dynamic resource allocation
items: items:
description: ResourceClaim references one entry in PodSpec.ResourceClaims. description: ResourceClaim references one entry in PodSpec.ResourceClaims.
properties: properties:
...@@ -10213,35 +10214,53 @@ spec: ...@@ -10213,35 +10214,53 @@ spec:
type: object type: object
type: array type: array
limits: limits:
description: Limits specifies the maximum resources allowed for the component
properties: properties:
cpu: cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string type: string
custom: custom:
additionalProperties: additionalProperties:
type: string type: string
description: Custom specifies additional custom resource requests/limits
type: object type: object
gpu: gpu:
description: |- description: |-
Indicates the number of GPUs to request. GPU indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string type: string
memory: memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string type: string
type: object type: object
requests: requests:
description: Requests specifies the minimum resources required by the component
properties: properties:
cpu: cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string type: string
custom: custom:
additionalProperties: additionalProperties:
type: string type: string
description: Custom specifies additional custom resource requests/limits
type: object type: object
gpu: gpu:
description: |- description: |-
Indicates the number of GPUs to request. GPU indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string type: string
memory: memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string type: string
type: object type: object
type: object type: object
......
...@@ -10328,6 +10328,7 @@ spec: ...@@ -10328,6 +10328,7 @@ spec:
GPUs/devices, and any runtime-specific resources. GPUs/devices, and any runtime-specific resources.
properties: properties:
claims: claims:
description: Claims specifies resource claims for dynamic resource allocation
items: items:
description: ResourceClaim references one entry in PodSpec.ResourceClaims. description: ResourceClaim references one entry in PodSpec.ResourceClaims.
properties: properties:
...@@ -10348,35 +10349,53 @@ spec: ...@@ -10348,35 +10349,53 @@ spec:
type: object type: object
type: array type: array
limits: limits:
description: Limits specifies the maximum resources allowed for the component
properties: properties:
cpu: cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string type: string
custom: custom:
additionalProperties: additionalProperties:
type: string type: string
description: Custom specifies additional custom resource requests/limits
type: object type: object
gpu: gpu:
description: |- description: |-
Indicates the number of GPUs to request. GPU indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string type: string
memory: memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string type: string
type: object type: object
requests: requests:
description: Requests specifies the minimum resources required by the component
properties: properties:
cpu: cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string type: string
custom: custom:
additionalProperties: additionalProperties:
type: string type: string
description: Custom specifies additional custom resource requests/limits
type: object type: object
gpu: gpu:
description: |- description: |-
Indicates the number of GPUs to request. GPU indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string type: string
memory: memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string type: string
type: object type: object
type: object type: object
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// +k8s:deepcopy-gen=package
package common
import (
corev1 "k8s.io/api/core/v1"
)
type ResourceItem struct {
CPU string `json:"cpu,omitempty"`
Memory string `json:"memory,omitempty"`
// Indicates the number of GPUs to request.
// total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU string `json:"gpu,omitempty"`
Custom map[string]string `json:"custom,omitempty"`
}
type Resources struct {
Requests *ResourceItem `json:"requests,omitempty"`
Limits *ResourceItem `json:"limits,omitempty"`
Claims []corev1.ResourceClaim `json:"claims,omitempty"`
}
type DeploymentTargetHPAConf struct {
CPU *int32 `json:"cpu,omitempty"`
GPU *int32 `json:"gpu,omitempty"`
Memory *string `json:"memory,omitempty"`
QPS *int64 `json:"qps,omitempty"`
MinReplicas *int32 `json:"min_replicas,omitempty"`
MaxReplicas *int32 `json:"max_replicas,omitempty"`
}
type LabelItemSchema struct {
Key string `json:"key"`
Value string `json:"value"`
}
type ExtraPodMetadata struct {
Annotations map[string]string `json:"annotations,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
}
type ExtraPodSpec struct {
*corev1.PodSpec `json:",inline"`
MainContainer *corev1.Container `json:"mainContainer,omitempty"`
}
//go:build !ignore_autogenerated
/*
SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by controller-gen. DO NOT EDIT.
package common
import (
"k8s.io/api/core/v1"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DeploymentTargetHPAConf) DeepCopyInto(out *DeploymentTargetHPAConf) {
*out = *in
if in.CPU != nil {
in, out := &in.CPU, &out.CPU
*out = new(int32)
**out = **in
}
if in.GPU != nil {
in, out := &in.GPU, &out.GPU
*out = new(int32)
**out = **in
}
if in.Memory != nil {
in, out := &in.Memory, &out.Memory
*out = new(string)
**out = **in
}
if in.QPS != nil {
in, out := &in.QPS, &out.QPS
*out = new(int64)
**out = **in
}
if in.MinReplicas != nil {
in, out := &in.MinReplicas, &out.MinReplicas
*out = new(int32)
**out = **in
}
if in.MaxReplicas != nil {
in, out := &in.MaxReplicas, &out.MaxReplicas
*out = new(int32)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentTargetHPAConf.
func (in *DeploymentTargetHPAConf) DeepCopy() *DeploymentTargetHPAConf {
if in == nil {
return nil
}
out := new(DeploymentTargetHPAConf)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtraPodMetadata) DeepCopyInto(out *ExtraPodMetadata) {
*out = *in
if in.Annotations != nil {
in, out := &in.Annotations, &out.Annotations
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodMetadata.
func (in *ExtraPodMetadata) DeepCopy() *ExtraPodMetadata {
if in == nil {
return nil
}
out := new(ExtraPodMetadata)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtraPodSpec) DeepCopyInto(out *ExtraPodSpec) {
*out = *in
if in.PodSpec != nil {
in, out := &in.PodSpec, &out.PodSpec
*out = new(v1.PodSpec)
(*in).DeepCopyInto(*out)
}
if in.MainContainer != nil {
in, out := &in.MainContainer, &out.MainContainer
*out = new(v1.Container)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodSpec.
func (in *ExtraPodSpec) DeepCopy() *ExtraPodSpec {
if in == nil {
return nil
}
out := new(ExtraPodSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelItemSchema) DeepCopyInto(out *LabelItemSchema) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelItemSchema.
func (in *LabelItemSchema) DeepCopy() *LabelItemSchema {
if in == nil {
return nil
}
out := new(LabelItemSchema)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ResourceItem) DeepCopyInto(out *ResourceItem) {
*out = *in
if in.Custom != nil {
in, out := &in.Custom, &out.Custom
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceItem.
func (in *ResourceItem) DeepCopy() *ResourceItem {
if in == nil {
return nil
}
out := new(ResourceItem)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Resources) DeepCopyInto(out *Resources) {
*out = *in
if in.Requests != nil {
in, out := &in.Requests, &out.Requests
*out = new(ResourceItem)
(*in).DeepCopyInto(*out)
}
if in.Limits != nil {
in, out := &in.Limits, &out.Limits
*out = new(ResourceItem)
(*in).DeepCopyInto(*out)
}
if in.Claims != nil {
in, out := &in.Claims, &out.Claims
*out = make([]v1.ResourceClaim, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resources.
func (in *Resources) DeepCopy() *Resources {
if in == nil {
return nil
}
out := new(Resources)
in.DeepCopyInto(out)
return out
}
...@@ -65,3 +65,53 @@ type SharedMemorySpec struct { ...@@ -65,3 +65,53 @@ type SharedMemorySpec struct {
Disabled bool `json:"disabled,omitempty"` Disabled bool `json:"disabled,omitempty"`
Size resource.Quantity `json:"size,omitempty"` Size resource.Quantity `json:"size,omitempty"`
} }
type ResourceItem struct {
// CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
CPU string `json:"cpu,omitempty"`
// Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
Memory string `json:"memory,omitempty"`
// GPU indicates the number of GPUs to request.
// Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
GPU string `json:"gpu,omitempty"`
// GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
// By default if not specified, the GPU type is "nvidia.com/gpu"
GPUType string `json:"gpuType,omitempty"`
// Custom specifies additional custom resource requests/limits
Custom map[string]string `json:"custom,omitempty"`
}
// Resources defines requested and limits for a component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources.
type Resources struct {
// Requests specifies the minimum resources required by the component
Requests *ResourceItem `json:"requests,omitempty"`
// Limits specifies the maximum resources allowed for the component
Limits *ResourceItem `json:"limits,omitempty"`
// Claims specifies resource claims for dynamic resource allocation
Claims []corev1.ResourceClaim `json:"claims,omitempty"`
}
type DeploymentTargetHPAConf struct {
CPU *int32 `json:"cpu,omitempty"`
GPU *int32 `json:"gpu,omitempty"`
Memory *string `json:"memory,omitempty"`
QPS *int64 `json:"qps,omitempty"`
MinReplicas *int32 `json:"min_replicas,omitempty"`
MaxReplicas *int32 `json:"max_replicas,omitempty"`
}
type LabelItemSchema struct {
Key string `json:"key"`
Value string `json:"value"`
}
type ExtraPodMetadata struct {
Annotations map[string]string `json:"annotations,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
}
type ExtraPodSpec struct {
*corev1.PodSpec `json:",inline"`
MainContainer *corev1.Container `json:"mainContainer,omitempty"`
}
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
package v1alpha1 package v1alpha1
import ( import (
dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
...@@ -74,7 +73,7 @@ type DynamoComponentDeploymentSharedSpec struct { ...@@ -74,7 +73,7 @@ type DynamoComponentDeploymentSharedSpec struct {
// Resources requested and limits for this component, including CPU, memory, // Resources requested and limits for this component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources. // GPUs/devices, and any runtime-specific resources.
Resources *dynamoCommon.Resources `json:"resources,omitempty"` Resources *Resources `json:"resources,omitempty"`
// Autoscaling config for this component (replica range, target utilization, etc.). // Autoscaling config for this component (replica range, target utilization, etc.).
Autoscaling *Autoscaling `json:"autoscaling,omitempty"` Autoscaling *Autoscaling `json:"autoscaling,omitempty"`
// Envs defines additional environment variables to inject into the component containers. // Envs defines additional environment variables to inject into the component containers.
...@@ -98,12 +97,12 @@ type DynamoComponentDeploymentSharedSpec struct { ...@@ -98,12 +97,12 @@ type DynamoComponentDeploymentSharedSpec struct {
// +optional // +optional
// ExtraPodMetadata adds labels/annotations to the created Pods. // ExtraPodMetadata adds labels/annotations to the created Pods.
ExtraPodMetadata *dynamoCommon.ExtraPodMetadata `json:"extraPodMetadata,omitempty"` ExtraPodMetadata *ExtraPodMetadata `json:"extraPodMetadata,omitempty"`
// +optional // +optional
// ExtraPodSpec allows to override the main pod spec configuration. // ExtraPodSpec allows to override the main pod spec configuration.
// It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field // It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
// that allows overriding the main container configuration. // that allows overriding the main container configuration.
ExtraPodSpec *dynamoCommon.ExtraPodSpec `json:"extraPodSpec,omitempty"` ExtraPodSpec *ExtraPodSpec `json:"extraPodSpec,omitempty"`
// LivenessProbe to detect and restart unhealthy containers. // LivenessProbe to detect and restart unhealthy containers.
LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"` LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"`
......
...@@ -38,11 +38,10 @@ limitations under the License. ...@@ -38,11 +38,10 @@ limitations under the License.
package v1alpha1 package v1alpha1
import ( import (
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"k8s.io/api/autoscaling/v2" "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1" "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
) )
...@@ -98,7 +97,7 @@ func (in *BaseStatus) DeepCopyInto(out *BaseStatus) { ...@@ -98,7 +97,7 @@ func (in *BaseStatus) DeepCopyInto(out *BaseStatus) {
*out = *in *out = *in
if in.Conditions != nil { if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in)) *out = make([]metav1.Condition, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -174,6 +173,51 @@ func (in *DeploymentStatus) DeepCopy() *DeploymentStatus { ...@@ -174,6 +173,51 @@ func (in *DeploymentStatus) DeepCopy() *DeploymentStatus {
return out return out
} }
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DeploymentTargetHPAConf) DeepCopyInto(out *DeploymentTargetHPAConf) {
*out = *in
if in.CPU != nil {
in, out := &in.CPU, &out.CPU
*out = new(int32)
**out = **in
}
if in.GPU != nil {
in, out := &in.GPU, &out.GPU
*out = new(int32)
**out = **in
}
if in.Memory != nil {
in, out := &in.Memory, &out.Memory
*out = new(string)
**out = **in
}
if in.QPS != nil {
in, out := &in.QPS, &out.QPS
*out = new(int64)
**out = **in
}
if in.MinReplicas != nil {
in, out := &in.MinReplicas, &out.MinReplicas
*out = new(int32)
**out = **in
}
if in.MaxReplicas != nil {
in, out := &in.MaxReplicas, &out.MaxReplicas
*out = new(int32)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentTargetHPAConf.
func (in *DeploymentTargetHPAConf) DeepCopy() *DeploymentTargetHPAConf {
if in == nil {
return nil
}
out := new(DeploymentTargetHPAConf)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoComponentDeployment) DeepCopyInto(out *DynamoComponentDeployment) { func (in *DynamoComponentDeployment) DeepCopyInto(out *DynamoComponentDeployment) {
*out = *in *out = *in
...@@ -257,7 +301,7 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent ...@@ -257,7 +301,7 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
} }
if in.Resources != nil { if in.Resources != nil {
in, out := &in.Resources, &out.Resources in, out := &in.Resources, &out.Resources
*out = new(common.Resources) *out = new(Resources)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.Autoscaling != nil { if in.Autoscaling != nil {
...@@ -267,7 +311,7 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent ...@@ -267,7 +311,7 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
} }
if in.Envs != nil { if in.Envs != nil {
in, out := &in.Envs, &out.Envs in, out := &in.Envs, &out.Envs
*out = make([]corev1.EnvVar, len(*in)) *out = make([]v1.EnvVar, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -299,22 +343,22 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent ...@@ -299,22 +343,22 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
} }
if in.ExtraPodMetadata != nil { if in.ExtraPodMetadata != nil {
in, out := &in.ExtraPodMetadata, &out.ExtraPodMetadata in, out := &in.ExtraPodMetadata, &out.ExtraPodMetadata
*out = new(common.ExtraPodMetadata) *out = new(ExtraPodMetadata)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.ExtraPodSpec != nil { if in.ExtraPodSpec != nil {
in, out := &in.ExtraPodSpec, &out.ExtraPodSpec in, out := &in.ExtraPodSpec, &out.ExtraPodSpec
*out = new(common.ExtraPodSpec) *out = new(ExtraPodSpec)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.LivenessProbe != nil { if in.LivenessProbe != nil {
in, out := &in.LivenessProbe, &out.LivenessProbe in, out := &in.LivenessProbe, &out.LivenessProbe
*out = new(corev1.Probe) *out = new(v1.Probe)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.ReadinessProbe != nil { if in.ReadinessProbe != nil {
in, out := &in.ReadinessProbe, &out.ReadinessProbe in, out := &in.ReadinessProbe, &out.ReadinessProbe
*out = new(corev1.Probe) *out = new(v1.Probe)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.Replicas != nil { if in.Replicas != nil {
...@@ -360,7 +404,7 @@ func (in *DynamoComponentDeploymentStatus) DeepCopyInto(out *DynamoComponentDepl ...@@ -360,7 +404,7 @@ func (in *DynamoComponentDeploymentStatus) DeepCopyInto(out *DynamoComponentDepl
*out = *in *out = *in
if in.Conditions != nil { if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in)) *out = make([]metav1.Condition, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -528,7 +572,7 @@ func (in *DynamoGraphDeploymentRequestStatus) DeepCopyInto(out *DynamoGraphDeplo ...@@ -528,7 +572,7 @@ func (in *DynamoGraphDeploymentRequestStatus) DeepCopyInto(out *DynamoGraphDeplo
*out = *in *out = *in
if in.Conditions != nil { if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in)) *out = make([]metav1.Condition, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -583,7 +627,7 @@ func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec ...@@ -583,7 +627,7 @@ func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec
} }
if in.Envs != nil { if in.Envs != nil {
in, out := &in.Envs, &out.Envs in, out := &in.Envs, &out.Envs
*out = make([]corev1.EnvVar, len(*in)) *out = make([]v1.EnvVar, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -605,7 +649,7 @@ func (in *DynamoGraphDeploymentStatus) DeepCopyInto(out *DynamoGraphDeploymentSt ...@@ -605,7 +649,7 @@ func (in *DynamoGraphDeploymentStatus) DeepCopyInto(out *DynamoGraphDeploymentSt
*out = *in *out = *in
if in.Conditions != nil { if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in)) *out = make([]metav1.Condition, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -711,7 +755,7 @@ func (in *DynamoModelStatus) DeepCopyInto(out *DynamoModelStatus) { ...@@ -711,7 +755,7 @@ func (in *DynamoModelStatus) DeepCopyInto(out *DynamoModelStatus) {
} }
if in.Conditions != nil { if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in)) *out = make([]metav1.Condition, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -743,6 +787,60 @@ func (in *EndpointInfo) DeepCopy() *EndpointInfo { ...@@ -743,6 +787,60 @@ func (in *EndpointInfo) DeepCopy() *EndpointInfo {
return out return out
} }
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtraPodMetadata) DeepCopyInto(out *ExtraPodMetadata) {
*out = *in
if in.Annotations != nil {
in, out := &in.Annotations, &out.Annotations
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodMetadata.
func (in *ExtraPodMetadata) DeepCopy() *ExtraPodMetadata {
if in == nil {
return nil
}
out := new(ExtraPodMetadata)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtraPodSpec) DeepCopyInto(out *ExtraPodSpec) {
*out = *in
if in.PodSpec != nil {
in, out := &in.PodSpec, &out.PodSpec
*out = new(v1.PodSpec)
(*in).DeepCopyInto(*out)
}
if in.MainContainer != nil {
in, out := &in.MainContainer, &out.MainContainer
*out = new(v1.Container)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodSpec.
func (in *ExtraPodSpec) DeepCopy() *ExtraPodSpec {
if in == nil {
return nil
}
out := new(ExtraPodSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *IngressSpec) DeepCopyInto(out *IngressSpec) { func (in *IngressSpec) DeepCopyInto(out *IngressSpec) {
*out = *in *out = *in
...@@ -812,6 +910,21 @@ func (in *IngressTLSSpec) DeepCopy() *IngressTLSSpec { ...@@ -812,6 +910,21 @@ func (in *IngressTLSSpec) DeepCopy() *IngressTLSSpec {
return out return out
} }
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelItemSchema) DeepCopyInto(out *LabelItemSchema) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelItemSchema.
func (in *LabelItemSchema) DeepCopy() *LabelItemSchema {
if in == nil {
return nil
}
out := new(LabelItemSchema)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ModelReference) DeepCopyInto(out *ModelReference) { func (in *ModelReference) DeepCopyInto(out *ModelReference) {
*out = *in *out = *in
...@@ -908,6 +1021,58 @@ func (in *ProfilingConfigSpec) DeepCopy() *ProfilingConfigSpec { ...@@ -908,6 +1021,58 @@ func (in *ProfilingConfigSpec) DeepCopy() *ProfilingConfigSpec {
return out return out
} }
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ResourceItem) DeepCopyInto(out *ResourceItem) {
*out = *in
if in.Custom != nil {
in, out := &in.Custom, &out.Custom
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceItem.
func (in *ResourceItem) DeepCopy() *ResourceItem {
if in == nil {
return nil
}
out := new(ResourceItem)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Resources) DeepCopyInto(out *Resources) {
*out = *in
if in.Requests != nil {
in, out := &in.Requests, &out.Requests
*out = new(ResourceItem)
(*in).DeepCopyInto(*out)
}
if in.Limits != nil {
in, out := &in.Limits, &out.Limits
*out = new(ResourceItem)
(*in).DeepCopyInto(*out)
}
if in.Claims != nil {
in, out := &in.Claims, &out.Claims
*out = make([]v1.ResourceClaim, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resources.
func (in *Resources) DeepCopy() *Resources {
if in == nil {
return nil
}
out := new(Resources)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SharedMemorySpec) DeepCopyInto(out *SharedMemorySpec) { func (in *SharedMemorySpec) DeepCopyInto(out *SharedMemorySpec) {
*out = *in *out = *in
......
...@@ -10193,6 +10193,7 @@ spec: ...@@ -10193,6 +10193,7 @@ spec:
GPUs/devices, and any runtime-specific resources. GPUs/devices, and any runtime-specific resources.
properties: properties:
claims: claims:
description: Claims specifies resource claims for dynamic resource allocation
items: items:
description: ResourceClaim references one entry in PodSpec.ResourceClaims. description: ResourceClaim references one entry in PodSpec.ResourceClaims.
properties: properties:
...@@ -10213,35 +10214,53 @@ spec: ...@@ -10213,35 +10214,53 @@ spec:
type: object type: object
type: array type: array
limits: limits:
description: Limits specifies the maximum resources allowed for the component
properties: properties:
cpu: cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string type: string
custom: custom:
additionalProperties: additionalProperties:
type: string type: string
description: Custom specifies additional custom resource requests/limits
type: object type: object
gpu: gpu:
description: |- description: |-
Indicates the number of GPUs to request. GPU indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string type: string
memory: memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string type: string
type: object type: object
requests: requests:
description: Requests specifies the minimum resources required by the component
properties: properties:
cpu: cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string type: string
custom: custom:
additionalProperties: additionalProperties:
type: string type: string
description: Custom specifies additional custom resource requests/limits
type: object type: object
gpu: gpu:
description: |- description: |-
Indicates the number of GPUs to request. GPU indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string type: string
memory: memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string type: string
type: object type: object
type: object type: object
......
...@@ -10328,6 +10328,7 @@ spec: ...@@ -10328,6 +10328,7 @@ spec:
GPUs/devices, and any runtime-specific resources. GPUs/devices, and any runtime-specific resources.
properties: properties:
claims: claims:
description: Claims specifies resource claims for dynamic resource allocation
items: items:
description: ResourceClaim references one entry in PodSpec.ResourceClaims. description: ResourceClaim references one entry in PodSpec.ResourceClaims.
properties: properties:
...@@ -10348,35 +10349,53 @@ spec: ...@@ -10348,35 +10349,53 @@ spec:
type: object type: object
type: array type: array
limits: limits:
description: Limits specifies the maximum resources allowed for the component
properties: properties:
cpu: cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string type: string
custom: custom:
additionalProperties: additionalProperties:
type: string type: string
description: Custom specifies additional custom resource requests/limits
type: object type: object
gpu: gpu:
description: |- description: |-
Indicates the number of GPUs to request. GPU indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string type: string
memory: memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string type: string
type: object type: object
requests: requests:
description: Requests specifies the minimum resources required by the component
properties: properties:
cpu: cpu:
description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
type: string type: string
custom: custom:
additionalProperties: additionalProperties:
type: string type: string
description: Custom specifies additional custom resource requests/limits
type: object type: object
gpu: gpu:
description: |- description: |-
Indicates the number of GPUs to request. GPU indicates the number of GPUs to request.
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
type: string
gpuType:
description: |-
GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
By default if not specified, the GPU type is "nvidia.com/gpu"
type: string type: string
memory: memory:
description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
type: string type: string
type: object type: object
type: object type: object
......
...@@ -15,51 +15,7 @@ ...@@ -15,51 +15,7 @@
* limitations under the License. * limitations under the License.
*/ */
package schemas package common
import (
"encoding/json"
"errors"
"time"
)
type DynamoComponent struct {
PresignedDownloadUrl string `json:"presigned_download_url"`
TransmissionStrategy *TransmissionStrategy `json:"transmission_strategy"`
}
type TransmissionStrategy string
const (
TransmissionStrategyPresignedURL TransmissionStrategy = "presigned_url"
TransmissionStrategyProxy TransmissionStrategy = "proxy"
)
type Duration time.Duration
func (d Duration) MarshalJSON() ([]byte, error) {
return json.Marshal(time.Duration(d).String())
}
func (d *Duration) UnmarshalJSON(b []byte) error {
var v any
if err := json.Unmarshal(b, &v); err != nil {
return err
}
switch value := v.(type) {
case float64:
*d = Duration(time.Duration(value))
case string:
tmp, err := time.ParseDuration(value)
if err != nil {
return err
}
*d = Duration(tmp)
default:
return errors.New("invalid duration")
}
return nil
}
type DeploymentStrategy string type DeploymentStrategy string
...@@ -69,10 +25,3 @@ const ( ...@@ -69,10 +25,3 @@ const (
DeploymentStrategyRampedSlowRollout DeploymentStrategy = "RampedSlowRollout" DeploymentStrategyRampedSlowRollout DeploymentStrategy = "RampedSlowRollout"
DeploymentStrategyBestEffortControlledRollout DeploymentStrategy = "BestEffortControlledRollout" DeploymentStrategyBestEffortControlledRollout DeploymentStrategy = "BestEffortControlledRollout"
) )
type DockerRegistrySchema struct {
DynamoRepositoryURI string `json:"dynamoRepositoryURI"`
Server string `json:"server"`
SecretName string `json:"secretName"`
Secure bool `json:"secure"`
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package common package common
import ( import (
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package common package common
import "testing" import "testing"
......
...@@ -33,8 +33,8 @@ import ( ...@@ -33,8 +33,8 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"emperror.dev/errors" "emperror.dev/errors"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/schemas"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/common"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonController "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common" commonController "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/dynamo" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/dynamo"
...@@ -1003,9 +1003,9 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con ...@@ -1003,9 +1003,9 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con
resourceAnnotations := getResourceAnnotations(opt.dynamoComponentDeployment) resourceAnnotations := getResourceAnnotations(opt.dynamoComponentDeployment)
strategyStr := resourceAnnotations[KubeAnnotationDeploymentStrategy] strategyStr := resourceAnnotations[KubeAnnotationDeploymentStrategy]
if strategyStr != "" { if strategyStr != "" {
strategyType := schemas.DeploymentStrategy(strategyStr) strategyType := common.DeploymentStrategy(strategyStr)
switch strategyType { switch strategyType {
case schemas.DeploymentStrategyRollingUpdate: case common.DeploymentStrategyRollingUpdate:
strategy = appsv1.DeploymentStrategy{ strategy = appsv1.DeploymentStrategy{
Type: appsv1.RollingUpdateDeploymentStrategyType, Type: appsv1.RollingUpdateDeploymentStrategyType,
RollingUpdate: &appsv1.RollingUpdateDeployment{ RollingUpdate: &appsv1.RollingUpdateDeployment{
...@@ -1013,11 +1013,11 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con ...@@ -1013,11 +1013,11 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con
MaxUnavailable: &defaultMaxUnavailable, MaxUnavailable: &defaultMaxUnavailable,
}, },
} }
case schemas.DeploymentStrategyRecreate: case common.DeploymentStrategyRecreate:
strategy = appsv1.DeploymentStrategy{ strategy = appsv1.DeploymentStrategy{
Type: appsv1.RecreateDeploymentStrategyType, Type: appsv1.RecreateDeploymentStrategyType,
} }
case schemas.DeploymentStrategyRampedSlowRollout: case common.DeploymentStrategyRampedSlowRollout:
strategy = appsv1.DeploymentStrategy{ strategy = appsv1.DeploymentStrategy{
Type: appsv1.RollingUpdateDeploymentStrategyType, Type: appsv1.RollingUpdateDeploymentStrategyType,
RollingUpdate: &appsv1.RollingUpdateDeployment{ RollingUpdate: &appsv1.RollingUpdateDeployment{
...@@ -1025,7 +1025,7 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con ...@@ -1025,7 +1025,7 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con
MaxUnavailable: &[]intstr.IntOrString{intstr.FromInt(0)}[0], MaxUnavailable: &[]intstr.IntOrString{intstr.FromInt(0)}[0],
}, },
} }
case schemas.DeploymentStrategyBestEffortControlledRollout: case common.DeploymentStrategyBestEffortControlledRollout:
strategy = appsv1.DeploymentStrategy{ strategy = appsv1.DeploymentStrategy{
Type: appsv1.RollingUpdateDeploymentStrategyType, Type: appsv1.RollingUpdateDeploymentStrategyType,
RollingUpdate: &appsv1.RollingUpdateDeployment{ RollingUpdate: &appsv1.RollingUpdateDeployment{
......
...@@ -24,7 +24,6 @@ import ( ...@@ -24,7 +24,6 @@ import (
"fmt" "fmt"
"testing" "testing"
dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
...@@ -703,18 +702,18 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -703,18 +702,18 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Multinode: &v1alpha1.MultinodeSpec{ Multinode: &v1alpha1.MultinodeSpec{
NodeCount: 2, NodeCount: 2,
}, },
Resources: &dynamoCommon.Resources{ Resources: &v1alpha1.Resources{
Requests: &dynamoCommon.ResourceItem{ Requests: &v1alpha1.ResourceItem{
CPU: "300m", CPU: "300m",
Memory: "500Mi", Memory: "500Mi",
}, },
Limits: &dynamoCommon.ResourceItem{ Limits: &v1alpha1.ResourceItem{
GPU: "1", GPU: "1",
Memory: "20Gi", Memory: "20Gi",
CPU: "10", CPU: "10",
}, },
}, },
ExtraPodMetadata: &dynamoCommon.ExtraPodMetadata{ ExtraPodMetadata: &v1alpha1.ExtraPodMetadata{
Annotations: map[string]string{ Annotations: map[string]string{
"nvidia.com/annotation1": "annotation1", "nvidia.com/annotation1": "annotation1",
}, },
...@@ -722,7 +721,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -722,7 +721,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
"nvidia.com/label1": "label1", "nvidia.com/label1": "label1",
}, },
}, },
ExtraPodSpec: &dynamoCommon.ExtraPodSpec{ ExtraPodSpec: &v1alpha1.ExtraPodSpec{
PodSpec: &corev1.PodSpec{ PodSpec: &corev1.PodSpec{
TerminationGracePeriodSeconds: ptr.To(int64(10)), TerminationGracePeriodSeconds: ptr.To(int64(10)),
Containers: []corev1.Container{ Containers: []corev1.Container{
...@@ -866,7 +865,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -866,7 +865,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Limits: corev1.ResourceList{ Limits: corev1.ResourceList{
corev1.ResourceMemory: resource.MustParse("20Gi"), corev1.ResourceMemory: resource.MustParse("20Gi"),
corev1.ResourceCPU: resource.MustParse("10"), corev1.ResourceCPU: resource.MustParse("10"),
"nvidia.com/gpu": resource.MustParse("1"), corev1.ResourceName(commonconsts.KubeResourceGPUNvidia): resource.MustParse("1"),
}, },
}, },
LivenessProbe: &corev1.Probe{ LivenessProbe: &corev1.Probe{
...@@ -1024,12 +1023,12 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -1024,12 +1023,12 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Multinode: &v1alpha1.MultinodeSpec{ Multinode: &v1alpha1.MultinodeSpec{
NodeCount: 2, NodeCount: 2,
}, },
Resources: &dynamoCommon.Resources{ Resources: &v1alpha1.Resources{
Limits: &dynamoCommon.ResourceItem{ Limits: &v1alpha1.ResourceItem{
GPU: "1", GPU: "1",
}, },
}, },
ExtraPodSpec: &dynamoCommon.ExtraPodSpec{ ExtraPodSpec: &v1alpha1.ExtraPodSpec{
MainContainer: &corev1.Container{ MainContainer: &corev1.Container{
Image: "test-image:latest", Image: "test-image:latest",
}, },
...@@ -1067,12 +1066,12 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -1067,12 +1066,12 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Multinode: &v1alpha1.MultinodeSpec{ Multinode: &v1alpha1.MultinodeSpec{
NodeCount: 2, NodeCount: 2,
}, },
Resources: &dynamoCommon.Resources{ Resources: &v1alpha1.Resources{
Limits: &dynamoCommon.ResourceItem{ Limits: &v1alpha1.ResourceItem{
GPU: "1", GPU: "1",
}, },
}, },
ExtraPodSpec: &dynamoCommon.ExtraPodSpec{ ExtraPodSpec: &v1alpha1.ExtraPodSpec{
MainContainer: &corev1.Container{ MainContainer: &corev1.Container{
Image: "", // Image is missing, will cause error in generatePodTemplateSpec Image: "", // Image is missing, will cause error in generatePodTemplateSpec
}, },
......
...@@ -25,7 +25,7 @@ import ( ...@@ -25,7 +25,7 @@ import (
"reflect" "reflect"
"sort" "sort"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
...@@ -386,7 +386,7 @@ func firstKey(m map[string]interface{}) string { ...@@ -386,7 +386,7 @@ func firstKey(m map[string]interface{}) string {
return keys[0] return keys[0]
} }
func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequirements, error) { func GetResourcesConfig(resources *v1alpha1.Resources) (*corev1.ResourceRequirements, error) {
if resources == nil { if resources == nil {
return nil, nil return nil, nil
...@@ -423,7 +423,7 @@ func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequiremen ...@@ -423,7 +423,7 @@ func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequiremen
if currentResources.Limits == nil { if currentResources.Limits == nil {
currentResources.Limits = make(corev1.ResourceList) currentResources.Limits = make(corev1.ResourceList)
} }
currentResources.Limits[corev1.ResourceName(consts.KubeResourceGPUNvidia)] = q currentResources.Limits[getGPUResourceName(resources.Limits)] = q
} }
for k, v := range resources.Limits.Custom { for k, v := range resources.Limits.Custom {
q, err := resource.ParseQuantity(v) q, err := resource.ParseQuantity(v)
...@@ -477,6 +477,16 @@ func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequiremen ...@@ -477,6 +477,16 @@ func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequiremen
return currentResources, nil return currentResources, nil
} }
func getGPUResourceName(resourceItem *v1alpha1.ResourceItem) corev1.ResourceName {
if resourceItem == nil {
return corev1.ResourceName(consts.KubeResourceGPUNvidia)
}
if resourceItem.GPUType != "" {
return corev1.ResourceName(resourceItem.GPUType)
}
return corev1.ResourceName(consts.KubeResourceGPUNvidia)
}
type Resource struct { type Resource struct {
client.Object client.Object
isReady func() (bool, string) isReady func() (bool, string)
......
...@@ -21,8 +21,11 @@ import ( ...@@ -21,8 +21,11 @@ import (
"testing" "testing"
appsv1 "k8s.io/api/apps/v1" appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/bsm/gomega" "github.com/bsm/gomega"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client"
...@@ -474,3 +477,58 @@ func TestCopySpec(t *testing.T) { ...@@ -474,3 +477,58 @@ func TestCopySpec(t *testing.T) {
g := gomega.NewGomegaWithT(t) g := gomega.NewGomegaWithT(t)
g.Expect(dst).To(gomega.Equal(expected)) g.Expect(dst).To(gomega.Equal(expected))
} }
func TestGetResourcesConfig(t *testing.T) {
tests := []struct {
name string
resources *v1alpha1.Resources
expectedGPULimit corev1.ResourceName
expectedGPUValue string
expectError bool
}{
{
name: "limits.gpu defined with no gpuType",
resources: &v1alpha1.Resources{
Limits: &v1alpha1.ResourceItem{
GPU: "4",
},
},
expectedGPULimit: corev1.ResourceName(consts.KubeResourceGPUNvidia),
expectedGPUValue: "4",
expectError: false,
},
{
name: "limits.gpu defined with custom gpuType",
resources: &v1alpha1.Resources{
Limits: &v1alpha1.ResourceItem{
GPU: "8",
GPUType: "gpu.custom-type.com/test",
},
},
expectedGPULimit: corev1.ResourceName("gpu.custom-type.com/test"),
expectedGPUValue: "8",
expectError: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := gomega.NewGomegaWithT(t)
result, err := GetResourcesConfig(tt.resources)
if tt.expectError {
g.Expect(err).To(gomega.HaveOccurred())
return
}
g.Expect(err).To(gomega.BeNil())
g.Expect(result).ToNot(gomega.BeNil())
g.Expect(result.Limits).ToNot(gomega.BeNil())
gpuQuantity, exists := result.Limits[tt.expectedGPULimit]
g.Expect(exists).To(gomega.BeTrue(), "GPU resource %s should exist in limits", tt.expectedGPULimit)
g.Expect(gpuQuantity.String()).To(gomega.Equal(tt.expectedGPUValue))
})
}
}
...@@ -6,7 +6,6 @@ import ( ...@@ -6,7 +6,6 @@ import (
"strconv" "strconv"
"strings" "strings"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
...@@ -197,7 +196,7 @@ func (b *TRTLLMBackend) generateWorkerHostnames(numberOfNodes int32, serviceName ...@@ -197,7 +196,7 @@ func (b *TRTLLMBackend) generateWorkerHostnames(numberOfNodes int32, serviceName
} }
// getGPUsPerNode extracts the number of GPUs per node from resources // getGPUsPerNode extracts the number of GPUs per node from resources
func getGPUsPerNode(resources *common.Resources) int32 { func getGPUsPerNode(resources *v1alpha1.Resources) int32 {
if resources != nil && resources.Requests != nil && resources.Requests.GPU != "" { if resources != nil && resources.Requests != nil && resources.Requests.GPU != "" {
if gpus, err := strconv.ParseInt(resources.Requests.GPU, 10, 32); err == nil { if gpus, err := strconv.ParseInt(resources.Requests.GPU, 10, 32); err == nil {
return int32(gpus) return int32(gpus)
......
...@@ -4,7 +4,6 @@ import ( ...@@ -4,7 +4,6 @@ import (
"strings" "strings"
"testing" "testing"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
...@@ -52,8 +51,8 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) { ...@@ -52,8 +51,8 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
Resources: &common.Resources{ Resources: &v1alpha1.Resources{
Requests: &common.ResourceItem{ Requests: &v1alpha1.ResourceItem{
GPU: "2", GPU: "2",
}, },
}, },
...@@ -106,8 +105,8 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) { ...@@ -106,8 +105,8 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &LWSMultinodeDeployer{}, multinodeDeployer: &LWSMultinodeDeployer{},
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
Resources: &common.Resources{ Resources: &v1alpha1.Resources{
Limits: &common.ResourceItem{ Limits: &v1alpha1.ResourceItem{
GPU: "1", GPU: "1",
}, },
}, },
...@@ -557,8 +556,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -557,8 +556,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test-service", serviceName: "test-service",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
Resources: &common.Resources{ Resources: &v1alpha1.Resources{
Requests: &common.ResourceItem{ Requests: &v1alpha1.ResourceItem{
GPU: "2", GPU: "2",
}, },
}, },
...@@ -583,8 +582,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -583,8 +582,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
Resources: &common.Resources{ Resources: &v1alpha1.Resources{
Limits: &common.ResourceItem{ Limits: &v1alpha1.ResourceItem{
GPU: "1", GPU: "1",
}, },
}, },
...@@ -599,8 +598,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -599,8 +598,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
Resources: &common.Resources{ Resources: &v1alpha1.Resources{
Limits: &common.ResourceItem{ Limits: &v1alpha1.ResourceItem{
GPU: "1", GPU: "1",
}, },
}, },
...@@ -615,8 +614,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -615,8 +614,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
Resources: &common.Resources{ Resources: &v1alpha1.Resources{
Limits: &common.ResourceItem{ Limits: &v1alpha1.ResourceItem{
GPU: "1", GPU: "1",
}, },
}, },
...@@ -631,8 +630,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -631,8 +630,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
Resources: &common.Resources{ Resources: &v1alpha1.Resources{
Limits: &common.ResourceItem{ Limits: &v1alpha1.ResourceItem{
GPU: "1", GPU: "1",
}, },
}, },
...@@ -647,8 +646,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -647,8 +646,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
Resources: &common.Resources{ Resources: &v1alpha1.Resources{
Requests: &common.ResourceItem{ Requests: &v1alpha1.ResourceItem{
GPU: "1", GPU: "1",
}, },
}, },
...@@ -663,8 +662,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -663,8 +662,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
Resources: &common.Resources{ Resources: &v1alpha1.Resources{
Requests: &common.ResourceItem{ Requests: &v1alpha1.ResourceItem{
GPU: "1", GPU: "1",
}, },
}, },
...@@ -778,7 +777,7 @@ func TestTRTLLMBackend_setupWorkerContainer(t *testing.T) { ...@@ -778,7 +777,7 @@ func TestTRTLLMBackend_setupWorkerContainer(t *testing.T) {
func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) { func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
resources *common.Resources resources *v1alpha1.Resources
expected int32 expected int32
}{ }{
{ {
...@@ -788,13 +787,13 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) { ...@@ -788,13 +787,13 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
}, },
{ {
name: "Empty resources - default to 0", name: "Empty resources - default to 0",
resources: &common.Resources{}, resources: &v1alpha1.Resources{},
expected: 0, expected: 0,
}, },
{ {
name: "GPU in requests", name: "GPU in requests",
resources: &common.Resources{ resources: &v1alpha1.Resources{
Requests: &common.ResourceItem{ Requests: &v1alpha1.ResourceItem{
GPU: "2", GPU: "2",
}, },
}, },
...@@ -802,8 +801,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) { ...@@ -802,8 +801,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
}, },
{ {
name: "GPU in limits", name: "GPU in limits",
resources: &common.Resources{ resources: &v1alpha1.Resources{
Limits: &common.ResourceItem{ Limits: &v1alpha1.ResourceItem{
GPU: "4", GPU: "4",
}, },
}, },
...@@ -811,11 +810,11 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) { ...@@ -811,11 +810,11 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
}, },
{ {
name: "GPU in both requests and limits - requests takes precedence", name: "GPU in both requests and limits - requests takes precedence",
resources: &common.Resources{ resources: &v1alpha1.Resources{
Requests: &common.ResourceItem{ Requests: &v1alpha1.ResourceItem{
GPU: "3", GPU: "3",
}, },
Limits: &common.ResourceItem{ Limits: &v1alpha1.ResourceItem{
GPU: "8", GPU: "8",
}, },
}, },
...@@ -823,8 +822,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) { ...@@ -823,8 +822,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
}, },
{ {
name: "Invalid GPU value - default to 0", name: "Invalid GPU value - default to 0",
resources: &common.Resources{ resources: &v1alpha1.Resources{
Requests: &common.ResourceItem{ Requests: &v1alpha1.ResourceItem{
GPU: "invalid", GPU: "invalid",
}, },
}, },
...@@ -832,8 +831,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) { ...@@ -832,8 +831,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
}, },
{ {
name: "Empty GPU string - default to 0", name: "Empty GPU string - default to 0",
resources: &common.Resources{ resources: &v1alpha1.Resources{
Requests: &common.ResourceItem{ Requests: &v1alpha1.ResourceItem{
GPU: "", GPU: "",
}, },
}, },
......
...@@ -6,7 +6,6 @@ import ( ...@@ -6,7 +6,6 @@ import (
"strings" "strings"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log"
) )
...@@ -26,7 +25,7 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes ...@@ -26,7 +25,7 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes
if isMultinode { if isMultinode {
// Apply multinode-specific argument modifications // Apply multinode-specific argument modifications
updateVLLMMultinodeArgs(container, role, serviceName, multinodeDeployer) updateVLLMMultinodeArgs(container, role, serviceName, multinodeDeployer, component.Resources)
// Remove probes for multinode worker and leader // Remove probes for multinode worker and leader
if role == RoleWorker { if role == RoleWorker {
...@@ -72,12 +71,12 @@ func (b *VLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32 ...@@ -72,12 +71,12 @@ func (b *VLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32
// updateVLLMMultinodeArgs will inject Ray-specific flags for tensor parallel multinode deployments // updateVLLMMultinodeArgs will inject Ray-specific flags for tensor parallel multinode deployments
// OR data parallel flags for data parallel multinode deployments // OR data parallel flags for data parallel multinode deployments
func updateVLLMMultinodeArgs(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer) { func updateVLLMMultinodeArgs(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer, resources *v1alpha1.Resources) {
expandedArgs := getExpandedArgs(container) expandedArgs := getExpandedArgs(container)
if needsRayDistributedLaunch(expandedArgs, container.Resources) { if needsRayDistributedLaunch(expandedArgs, resources) {
injectRayDistributedLaunchFlags(container, role, serviceName, multinodeDeployer) injectRayDistributedLaunchFlags(container, role, serviceName, multinodeDeployer)
} else if needsDataParallelLaunch(expandedArgs, container.Resources) { } else if needsDataParallelLaunch(expandedArgs, resources) {
injectDataParallelLaunchFlags(container, role, serviceName, multinodeDeployer) injectDataParallelLaunchFlags(container, role, serviceName, multinodeDeployer, resources)
} else { } else {
logger := log.Log.WithName("vllm-backend") logger := log.Log.WithName("vllm-backend")
logger.Info("No need to inject Ray-specific or data parallel flags for multinode deployments", "args", strings.Join(container.Args, " ")) logger.Info("No need to inject Ray-specific or data parallel flags for multinode deployments", "args", strings.Join(container.Args, " "))
...@@ -109,10 +108,10 @@ func injectRayDistributedLaunchFlags(container *corev1.Container, role Role, ser ...@@ -109,10 +108,10 @@ func injectRayDistributedLaunchFlags(container *corev1.Container, role Role, ser
container.Command = []string{"/bin/sh", "-c"} // ensure cmd is a shell container.Command = []string{"/bin/sh", "-c"} // ensure cmd is a shell
} }
func injectDataParallelLaunchFlags(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer) { func injectDataParallelLaunchFlags(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer, resources *v1alpha1.Resources) {
expandedArgs := getExpandedArgs(container) expandedArgs := getExpandedArgs(container)
leaderHostname := multinodeDeployer.GetLeaderHostname(serviceName) leaderHostname := multinodeDeployer.GetLeaderHostname(serviceName)
dataParallelSizeLocal := getContainerGPUs(container.Resources) / getWorldSize(expandedArgs) dataParallelSizeLocal := getContainerGPUs(resources) / getWorldSize(expandedArgs)
var startRank string var startRank string
switch role { switch role {
case RoleWorker: case RoleWorker:
...@@ -134,8 +133,12 @@ func injectDataParallelLaunchFlags(container *corev1.Container, role Role, servi ...@@ -134,8 +133,12 @@ func injectDataParallelLaunchFlags(container *corev1.Container, role Role, servi
// if world size (within DP rank) > GPU count, then we need to inject ray // if world size (within DP rank) > GPU count, then we need to inject ray
// world size = tensor parallel size * pipeline parallel size // world size = tensor parallel size * pipeline parallel size
func needsRayDistributedLaunch(expandedArgs []string, resources corev1.ResourceRequirements) bool { func needsRayDistributedLaunch(expandedArgs []string, resources *v1alpha1.Resources) bool {
return getWorldSize(expandedArgs) > getContainerGPUs(resources) containerGPUs := getContainerGPUs(resources)
if containerGPUs == 0 {
return false
}
return getWorldSize(expandedArgs) > containerGPUs
} }
func getWorldSize(expandedArgs []string) int64 { func getWorldSize(expandedArgs []string) int64 {
...@@ -145,9 +148,13 @@ func getWorldSize(expandedArgs []string) int64 { ...@@ -145,9 +148,13 @@ func getWorldSize(expandedArgs []string) int64 {
} }
// if world size across all DP ranks > GPU count, then we need to inject data parallel multinode coordination // if world size across all DP ranks > GPU count, then we need to inject data parallel multinode coordination
func needsDataParallelLaunch(expandedArgs []string, resources corev1.ResourceRequirements) bool { func needsDataParallelLaunch(expandedArgs []string, resources *v1alpha1.Resources) bool {
dataParallelSize := getFlagValue(expandedArgs, dataParallelSizeFlag) dataParallelSize := getFlagValue(expandedArgs, dataParallelSizeFlag)
return getWorldSize(expandedArgs)*dataParallelSize > getContainerGPUs(resources) containerGPUs := getContainerGPUs(resources)
if containerGPUs == 0 {
return false
}
return getWorldSize(expandedArgs)*dataParallelSize > containerGPUs
} }
func getFlagValue(expandedArgs []string, flag string) int64 { func getFlagValue(expandedArgs []string, flag string) int64 {
...@@ -164,14 +171,12 @@ func getFlagValue(expandedArgs []string, flag string) int64 { ...@@ -164,14 +171,12 @@ func getFlagValue(expandedArgs []string, flag string) int64 {
return flagValue return flagValue
} }
func getContainerGPUs(resources corev1.ResourceRequirements) int64 { func getContainerGPUs(resources *v1alpha1.Resources) int64 {
var containerGPUs int64 = 1 if resources == nil || resources.Limits == nil || resources.Limits.GPU == "" {
// Requests defaults to Limits, doesn't make sense in case where Requests < Limits for gpus return 0
for name, quantity := range resources.Limits { }
if name.String() == consts.KubeResourceGPUNvidia { if gpus, err := strconv.ParseInt(resources.Limits.GPU, 10, 64); err == nil {
containerGPUs = quantity.Value() return gpus
break
}
} }
return containerGPUs return 0
} }
...@@ -3,12 +3,12 @@ package dynamo ...@@ -3,12 +3,12 @@ package dynamo
import ( import (
"fmt" "fmt"
"reflect" "reflect"
"strconv"
"testing" "testing"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/onsi/gomega" "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
) )
func TestVLLMBackend_UpdateContainer(t *testing.T) { func TestVLLMBackend_UpdateContainer(t *testing.T) {
...@@ -21,6 +21,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -21,6 +21,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
component *v1alpha1.DynamoComponentDeploymentSharedSpec component *v1alpha1.DynamoComponentDeploymentSharedSpec
multinodeDeployer MultinodeDeployer multinodeDeployer MultinodeDeployer
initialContainer *corev1.Container initialContainer *corev1.Container
gpuCount int64 // GPU count for the test case
expectedArgs []string expectedArgs []string
expectNotModified bool // If true, container args should not change expectNotModified bool // If true, container args should not change
expectProbesRemoved bool // If true, probes should be nil expectProbesRemoved bool // If true, probes should be nil
...@@ -32,6 +33,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -32,6 +33,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm"}}, initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm"}},
gpuCount: 0,
expectNotModified: true, expectNotModified: true,
}, },
{ {
...@@ -40,7 +42,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -40,7 +42,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
role: RoleLeader, role: RoleLeader,
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"python3", "-m", "dynamo.vllm"}, Args: []string{"--model", "test", tensorParallelSizeFlag, "8"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}}, initialContainer: &corev1.Container{Command: []string{"python3", "-m", "dynamo.vllm"}, Args: []string{"--model", "test", tensorParallelSizeFlag, "8"}},
gpuCount: 4,
expectedArgs: []string{fmt.Sprintf("ray start --head --port=%s && python3 -m dynamo.vllm --model test %s 8", VLLMPort, tensorParallelSizeFlag)}, expectedArgs: []string{fmt.Sprintf("ray start --head --port=%s && python3 -m dynamo.vllm --model test %s 8", VLLMPort, tensorParallelSizeFlag)},
expectProbesRemoved: true, expectProbesRemoved: true,
}, },
...@@ -50,7 +53,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -50,7 +53,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
role: RoleWorker, role: RoleWorker,
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", "--model", "test", tensorParallelSizeFlag, "8"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}}, initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", "--model", "test", tensorParallelSizeFlag, "8"}},
gpuCount: 4,
expectedArgs: []string{"ray start --address=$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE):6379 --block"}, expectedArgs: []string{"ray start --address=$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE):6379 --block"},
expectProbesRemoved: true, expectProbesRemoved: true,
}, },
...@@ -60,7 +64,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -60,7 +64,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
role: RoleWorker, role: RoleWorker,
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &LWSMultinodeDeployer{}, multinodeDeployer: &LWSMultinodeDeployer{},
initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "8"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}}, initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "8"}},
gpuCount: 4,
expectedArgs: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"}, expectedArgs: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"},
expectProbesRemoved: true, expectProbesRemoved: true,
}, },
...@@ -71,6 +76,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -71,6 +76,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Args: []string{}}, initialContainer: &corev1.Container{Args: []string{}},
gpuCount: 0,
expectNotModified: true, // Should not modify empty args expectNotModified: true, // Should not modify empty args
}, },
{ {
...@@ -80,6 +86,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -80,6 +86,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.frontend"}}, initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.frontend"}},
gpuCount: 0,
expectNotModified: true, expectNotModified: true,
}, },
} }
...@@ -90,6 +97,15 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -90,6 +97,15 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
initialContainerArgs := append([]string{}, tt.initialContainer.Args...) initialContainerArgs := append([]string{}, tt.initialContainer.Args...)
// Create resources from GPU count and set in component
if tt.gpuCount > 0 {
tt.component.Resources = &v1alpha1.Resources{
Limits: &v1alpha1.ResourceItem{
GPU: strconv.FormatInt(tt.gpuCount, 10),
},
}
}
// Call UpdateContainer // Call UpdateContainer
backend.UpdateContainer(tt.initialContainer, tt.numberOfNodes, tt.role, tt.component, "test-service", tt.multinodeDeployer) backend.UpdateContainer(tt.initialContainer, tt.numberOfNodes, tt.role, tt.component, "test-service", tt.multinodeDeployer)
...@@ -119,6 +135,7 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) { ...@@ -119,6 +135,7 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
role Role role Role
multinodeDeployer MultinodeDeployer multinodeDeployer MultinodeDeployer
initialContainer *corev1.Container initialContainer *corev1.Container
gpuCount int64 // GPU count for the test case
expectedArgs []string expectedArgs []string
description string description string
}{ }{
...@@ -128,6 +145,7 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) { ...@@ -128,6 +145,7 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
role: RoleMain, role: RoleMain,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{"python3 -m dynamo.vllm"}}, initialContainer: &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{"python3 -m dynamo.vllm"}},
gpuCount: 0,
expectedArgs: []string{"python3 -m dynamo.vllm"}, expectedArgs: []string{"python3 -m dynamo.vllm"},
description: "Single node should not modify shell commands", description: "Single node should not modify shell commands",
}, },
...@@ -136,7 +154,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) { ...@@ -136,7 +154,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8", dataParallelSizeFlag)}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}}, initialContainer: &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8", dataParallelSizeFlag)}},
gpuCount: 4,
expectedArgs: []string{"python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8"}, expectedArgs: []string{"python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8"},
description: "Shell commands should use regex injection for python commands", description: "Shell commands should use regex injection for python commands",
}, },
...@@ -145,7 +164,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) { ...@@ -145,7 +164,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("echo blah | wc -l && python3 -m dynamo.vllm %s 8 && ls -al", dataParallelSizeFlag)}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}}, initialContainer: &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("echo blah | wc -l && python3 -m dynamo.vllm %s 8 && ls -al", dataParallelSizeFlag)}},
gpuCount: 4,
expectedArgs: []string{"echo blah | wc -l && python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8 && ls -al"}, expectedArgs: []string{"echo blah | wc -l && python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8 && ls -al"},
description: "Complex shell commands should inject flags only into python part", description: "Complex shell commands should inject flags only into python part",
}, },
...@@ -154,7 +174,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) { ...@@ -154,7 +174,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &LWSMultinodeDeployer{}, multinodeDeployer: &LWSMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8", dataParallelSizeFlag)}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}}, initialContainer: &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8", dataParallelSizeFlag)}},
gpuCount: 4,
expectedArgs: []string{"python3 -m dynamo.vllm --data-parallel-address $(LWS_LEADER_ADDRESS) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8"}, expectedArgs: []string{"python3 -m dynamo.vllm --data-parallel-address $(LWS_LEADER_ADDRESS) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8"},
description: "LWS shell commands should use LWS variables", description: "LWS shell commands should use LWS variables",
}, },
...@@ -163,7 +184,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) { ...@@ -163,7 +184,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8 | tee /tmp/log", dataParallelSizeFlag)}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}}, initialContainer: &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8 | tee /tmp/log", dataParallelSizeFlag)}},
gpuCount: 4,
expectedArgs: []string{"python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8 | tee /tmp/log"}, expectedArgs: []string{"python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8 | tee /tmp/log"},
description: "Shell commands with pipes should inject flags before pipe", description: "Shell commands with pipes should inject flags before pipe",
}, },
...@@ -173,7 +195,17 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) { ...@@ -173,7 +195,17 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
expectedCommand := append([]string{}, tt.initialContainer.Command...) expectedCommand := append([]string{}, tt.initialContainer.Command...)
backend.UpdateContainer(tt.initialContainer, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentSharedSpec{}, "test-service", tt.multinodeDeployer) // Create component with resources from GPU count
component := &v1alpha1.DynamoComponentDeploymentSharedSpec{}
if tt.gpuCount > 0 {
component.Resources = &v1alpha1.Resources{
Limits: &v1alpha1.ResourceItem{
GPU: strconv.FormatInt(tt.gpuCount, 10),
},
}
}
backend.UpdateContainer(tt.initialContainer, tt.numberOfNodes, tt.role, component, "test-service", tt.multinodeDeployer)
if !reflect.DeepEqual(tt.initialContainer.Args, tt.expectedArgs) { if !reflect.DeepEqual(tt.initialContainer.Args, tt.expectedArgs) {
t.Errorf("UpdateContainer() args = %v, want %v", tt.initialContainer.Args, tt.expectedArgs) t.Errorf("UpdateContainer() args = %v, want %v", tt.initialContainer.Args, tt.expectedArgs)
...@@ -296,6 +328,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) { ...@@ -296,6 +328,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
role Role role Role
multinodeDeployer MultinodeDeployer multinodeDeployer MultinodeDeployer
initialContainer *corev1.Container initialContainer *corev1.Container
gpuCount int64 // GPU count for the test case
expectedArgs []string expectedArgs []string
expectNotModified bool expectNotModified bool
}{ }{
...@@ -303,14 +336,16 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) { ...@@ -303,14 +336,16 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
name: "leader prepends ray start --head", name: "leader prepends ray start --head",
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}}, initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}},
gpuCount: 8,
expectedArgs: []string{fmt.Sprintf("ray start --head --port=%s && python3 -m dynamo.vllm %s 16", VLLMPort, tensorParallelSizeFlag)}, expectedArgs: []string{fmt.Sprintf("ray start --head --port=%s && python3 -m dynamo.vllm %s 16", VLLMPort, tensorParallelSizeFlag)},
}, },
{ {
name: "leader prepends distributed data parallel flags", name: "leader prepends distributed data parallel flags",
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}}, initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "16"}},
gpuCount: 8,
expectedArgs: []string{fmt.Sprintf("exec python3 -m dynamo.vllm %s 16 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 8 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0", dataParallelSizeFlag)}, expectedArgs: []string{fmt.Sprintf("exec python3 -m dynamo.vllm %s 16 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 8 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0", dataParallelSizeFlag)},
}, },
{ {
...@@ -318,34 +353,39 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) { ...@@ -318,34 +353,39 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Args: []string{}}, initialContainer: &corev1.Container{Args: []string{}},
gpuCount: 0,
expectNotModified: true, expectNotModified: true,
}, },
{ {
name: "worker with ray distributed launch Grove", name: "worker with ray distributed launch Grove",
role: RoleWorker, role: RoleWorker,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}}, initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}},
gpuCount: 8,
expectedArgs: []string{"ray start --address=$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE):6379 --block"}, expectedArgs: []string{"ray start --address=$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE):6379 --block"},
}, },
{ {
name: "worker with data parallel launch Grove", name: "worker with data parallel launch Grove",
role: RoleWorker, role: RoleWorker,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}}, initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "16"}},
gpuCount: 8,
expectedArgs: []string{fmt.Sprintf("exec python3 -m dynamo.vllm %s 16 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 8 --data-parallel-rpc-port 13445 --data-parallel-start-rank $(( 8 * $((GROVE_PCLQ_POD_INDEX + 1)) ))", dataParallelSizeFlag)}, expectedArgs: []string{fmt.Sprintf("exec python3 -m dynamo.vllm %s 16 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 8 --data-parallel-rpc-port 13445 --data-parallel-start-rank $(( 8 * $((GROVE_PCLQ_POD_INDEX + 1)) ))", dataParallelSizeFlag)},
}, },
{ {
name: "worker with data parallel launch Grove, tp > 1", name: "worker with data parallel launch Grove, tp > 1",
role: RoleWorker, role: RoleWorker,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "8", tensorParallelSizeFlag, "2"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}}, initialContainer: &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "8", tensorParallelSizeFlag, "2"}},
gpuCount: 8,
expectedArgs: []string{fmt.Sprintf("exec python3 -m dynamo.vllm %s 8 %s 2 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank $(( 4 * $((GROVE_PCLQ_POD_INDEX + 1)) ))", dataParallelSizeFlag, tensorParallelSizeFlag)}, expectedArgs: []string{fmt.Sprintf("exec python3 -m dynamo.vllm %s 8 %s 2 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank $(( 4 * $((GROVE_PCLQ_POD_INDEX + 1)) ))", dataParallelSizeFlag, tensorParallelSizeFlag)},
}, },
{ {
name: "worker with ray distributed launch LWS", name: "worker with ray distributed launch LWS",
role: RoleWorker, role: RoleWorker,
multinodeDeployer: &LWSMultinodeDeployer{}, multinodeDeployer: &LWSMultinodeDeployer{},
initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}}, initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}},
gpuCount: 8,
expectedArgs: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"}, expectedArgs: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"},
}, },
{ {
...@@ -353,6 +393,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) { ...@@ -353,6 +393,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
role: RoleMain, role: RoleMain,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.frontend"}}, initialContainer: &corev1.Container{Args: []string{"python3", "-m", "dynamo.frontend"}},
gpuCount: 0,
expectNotModified: true, expectNotModified: true,
}, },
} }
...@@ -362,8 +403,19 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) { ...@@ -362,8 +403,19 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
g := gomega.NewGomegaWithT(t) g := gomega.NewGomegaWithT(t)
initialContainerArgs := append([]string{}, tt.initialContainer.Args...) initialContainerArgs := append([]string{}, tt.initialContainer.Args...)
// Create resources from GPU count
var resources *v1alpha1.Resources
if tt.gpuCount > 0 {
resources = &v1alpha1.Resources{
Limits: &v1alpha1.ResourceItem{
GPU: strconv.FormatInt(tt.gpuCount, 10),
},
}
}
// Call updateVLLMMultinodeArgs // Call updateVLLMMultinodeArgs
updateVLLMMultinodeArgs(tt.initialContainer, tt.role, "test-service", tt.multinodeDeployer) updateVLLMMultinodeArgs(tt.initialContainer, tt.role, "test-service", tt.multinodeDeployer, resources)
if tt.expectNotModified { if tt.expectNotModified {
// Args should not have changed // Args should not have changed
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment