feat: add custom gpu type to CRD (#4408)

9caafa55 · Thomas Montfort · GitHub · 33d9ae78 · 9caafa55 · 9caafa55
Unverified Commit 9caafa55 authored Nov 18, 2025 by Thomas Montfort Committed by GitHub Nov 18, 2025
20 changed files
--- a/deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml
+++ b/deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml
@@ -10193,6 +10193,7 @@ spec:
                    GPUs/devices, and any runtime-specific resources.
                  properties:
                    claims:
+                      description: Claims specifies resource claims for dynamic resource allocation
                      items:
                        description: ResourceClaim references one entry in PodSpec.ResourceClaims.
                        properties:
@@ -10213,35 +10214,53 @@ spec:
                        type: object
                      type: array
                    limits:
+                      description: Limits specifies the maximum resources allowed for the component
                      properties:
                        cpu:
+                          description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
                          type: string
                        custom:
                          additionalProperties:
                            type: string
+                          description: Custom specifies additional custom resource requests/limits
                          type: object
                        gpu:
                          description: |-
-                            Indicates the number of GPUs to request.
-                            total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                            GPU indicates the number of GPUs to request.
+                            Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                          type: string
+                        gpuType:
+                          description: |-
+                            GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
+                            By default if not specified, the GPU type is "nvidia.com/gpu"
                          type: string
                        memory:
+                          description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
                          type: string
                      type: object
                    requests:
+                      description: Requests specifies the minimum resources required by the component
                      properties:
                        cpu:
+                          description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
                          type: string
                        custom:
                          additionalProperties:
                            type: string
+                          description: Custom specifies additional custom resource requests/limits
                          type: object
                        gpu:
                          description: |-
-                            Indicates the number of GPUs to request.
-                            total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                            GPU indicates the number of GPUs to request.
+                            Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                          type: string
+                        gpuType:
+                          description: |-
+                            GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
+                            By default if not specified, the GPU type is "nvidia.com/gpu"
                          type: string
                        memory:
+                          description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
                          type: string
                      type: object
                  type: object

--- a/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml
+++ b/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml
@@ -10328,6 +10328,7 @@ spec:
                          GPUs/devices, and any runtime-specific resources.
                        properties:
                          claims:
+                            description: Claims specifies resource claims for dynamic resource allocation
                            items:
                              description: ResourceClaim references one entry in PodSpec.ResourceClaims.
                              properties:
@@ -10348,35 +10349,53 @@ spec:
                              type: object
                            type: array
                          limits:
+                            description: Limits specifies the maximum resources allowed for the component
                            properties:
                              cpu:
+                                description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
                                type: string
                              custom:
                                additionalProperties:
                                  type: string
+                                description: Custom specifies additional custom resource requests/limits
                                type: object
                              gpu:
                                description: |-
-                                  Indicates the number of GPUs to request.
-                                  total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                                  GPU indicates the number of GPUs to request.
+                                  Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                                type: string
+                              gpuType:
+                                description: |-
+                                  GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
+                                  By default if not specified, the GPU type is "nvidia.com/gpu"
                                type: string
                              memory:
+                                description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
                                type: string
                            type: object
                          requests:
+                            description: Requests specifies the minimum resources required by the component
                            properties:
                              cpu:
+                                description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
                                type: string
                              custom:
                                additionalProperties:
                                  type: string
+                                description: Custom specifies additional custom resource requests/limits
                                type: object
                              gpu:
                                description: |-
-                                  Indicates the number of GPUs to request.
-                                  total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                                  GPU indicates the number of GPUs to request.
+                                  Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                                type: string
+                              gpuType:
+                                description: |-
+                                  GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
+                                  By default if not specified, the GPU type is "nvidia.com/gpu"
                                type: string
                              memory:
+                                description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
                                type: string
                            type: object
                        type: object

--- a/deploy/cloud/operator/api/dynamo/common/common.go
+++ b/deploy/cloud/operator/api/dynamo/common/common.go
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// +k8s:deepcopy-gen=package
-package common
-
-import (
-	corev1 "k8s.io/api/core/v1"
-)
-
-type ResourceItem struct {
-	CPU    string `json:"cpu,omitempty"`
-	Memory string `json:"memory,omitempty"`
-	// Indicates the number of GPUs to request.
-	// total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
-	GPU    string            `json:"gpu,omitempty"`
-	Custom map[string]string `json:"custom,omitempty"`
-}
-
-type Resources struct {
-	Requests *ResourceItem          `json:"requests,omitempty"`
-	Limits   *ResourceItem          `json:"limits,omitempty"`
-	Claims   []corev1.ResourceClaim `json:"claims,omitempty"`
-}
-
-type DeploymentTargetHPAConf struct {
-	CPU         *int32  `json:"cpu,omitempty"`
-	GPU         *int32  `json:"gpu,omitempty"`
-	Memory      *string `json:"memory,omitempty"`
-	QPS         *int64  `json:"qps,omitempty"`
-	MinReplicas *int32  `json:"min_replicas,omitempty"`
-	MaxReplicas *int32  `json:"max_replicas,omitempty"`
-}
-
-type LabelItemSchema struct {
-	Key   string `json:"key"`
-	Value string `json:"value"`
-}
-
-type ExtraPodMetadata struct {
-	Annotations map[string]string `json:"annotations,omitempty"`
-	Labels      map[string]string `json:"labels,omitempty"`
-}
-
-type ExtraPodSpec struct {
-	*corev1.PodSpec `json:",inline"`
-	MainContainer   *corev1.Container `json:"mainContainer,omitempty"`
-}
--- a/deploy/cloud/operator/api/dynamo/common/zz_generated.deepcopy.go
+++ b/deploy/cloud/operator/api/dynamo/common/zz_generated.deepcopy.go
-//go:build !ignore_autogenerated
-
-/*
-SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-SPDX-License-Identifier: Apache-2.0
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-/*
-Copyright 2024.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-// Code generated by controller-gen. DO NOT EDIT.
-
-package common
-
-import (
-	"k8s.io/api/core/v1"
-)
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DeploymentTargetHPAConf) DeepCopyInto(out *DeploymentTargetHPAConf) {
-	*out = *in
-	if in.CPU != nil {
-		in, out := &in.CPU, &out.CPU
-		*out = new(int32)
-		**out = **in
-	}
-	if in.GPU != nil {
-		in, out := &in.GPU, &out.GPU
-		*out = new(int32)
-		**out = **in
-	}
-	if in.Memory != nil {
-		in, out := &in.Memory, &out.Memory
-		*out = new(string)
-		**out = **in
-	}
-	if in.QPS != nil {
-		in, out := &in.QPS, &out.QPS
-		*out = new(int64)
-		**out = **in
-	}
-	if in.MinReplicas != nil {
-		in, out := &in.MinReplicas, &out.MinReplicas
-		*out = new(int32)
-		**out = **in
-	}
-	if in.MaxReplicas != nil {
-		in, out := &in.MaxReplicas, &out.MaxReplicas
-		*out = new(int32)
-		**out = **in
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentTargetHPAConf.
-func (in *DeploymentTargetHPAConf) DeepCopy() *DeploymentTargetHPAConf {
-	if in == nil {
-		return nil
-	}
-	out := new(DeploymentTargetHPAConf)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ExtraPodMetadata) DeepCopyInto(out *ExtraPodMetadata) {
-	*out = *in
-	if in.Annotations != nil {
-		in, out := &in.Annotations, &out.Annotations
-		*out = make(map[string]string, len(*in))
-		for key, val := range *in {
-			(*out)[key] = val
-		}
-	}
-	if in.Labels != nil {
-		in, out := &in.Labels, &out.Labels
-		*out = make(map[string]string, len(*in))
-		for key, val := range *in {
-			(*out)[key] = val
-		}
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodMetadata.
-func (in *ExtraPodMetadata) DeepCopy() *ExtraPodMetadata {
-	if in == nil {
-		return nil
-	}
-	out := new(ExtraPodMetadata)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ExtraPodSpec) DeepCopyInto(out *ExtraPodSpec) {
-	*out = *in
-	if in.PodSpec != nil {
-		in, out := &in.PodSpec, &out.PodSpec
-		*out = new(v1.PodSpec)
-		(*in).DeepCopyInto(*out)
-	}
-	if in.MainContainer != nil {
-		in, out := &in.MainContainer, &out.MainContainer
-		*out = new(v1.Container)
-		(*in).DeepCopyInto(*out)
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodSpec.
-func (in *ExtraPodSpec) DeepCopy() *ExtraPodSpec {
-	if in == nil {
-		return nil
-	}
-	out := new(ExtraPodSpec)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *LabelItemSchema) DeepCopyInto(out *LabelItemSchema) {
-	*out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelItemSchema.
-func (in *LabelItemSchema) DeepCopy() *LabelItemSchema {
-	if in == nil {
-		return nil
-	}
-	out := new(LabelItemSchema)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ResourceItem) DeepCopyInto(out *ResourceItem) {
-	*out = *in
-	if in.Custom != nil {
-		in, out := &in.Custom, &out.Custom
-		*out = make(map[string]string, len(*in))
-		for key, val := range *in {
-			(*out)[key] = val
-		}
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceItem.
-func (in *ResourceItem) DeepCopy() *ResourceItem {
-	if in == nil {
-		return nil
-	}
-	out := new(ResourceItem)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *Resources) DeepCopyInto(out *Resources) {
-	*out = *in
-	if in.Requests != nil {
-		in, out := &in.Requests, &out.Requests
-		*out = new(ResourceItem)
-		(*in).DeepCopyInto(*out)
-	}
-	if in.Limits != nil {
-		in, out := &in.Limits, &out.Limits
-		*out = new(ResourceItem)
-		(*in).DeepCopyInto(*out)
-	}
-	if in.Claims != nil {
-		in, out := &in.Claims, &out.Claims
-		*out = make([]v1.ResourceClaim, len(*in))
-		copy(*out, *in)
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resources.
-func (in *Resources) DeepCopy() *Resources {
-	if in == nil {
-		return nil
-	}
-	out := new(Resources)
-	in.DeepCopyInto(out)
-	return out
-}
--- a/deploy/cloud/operator/api/v1alpha1/common.go
+++ b/deploy/cloud/operator/api/v1alpha1/common.go
@@ -65,3 +65,53 @@ type SharedMemorySpec struct {
 	Disabled bool              `json:"disabled,omitempty"`
 	Size     resource.Quantity `json:"size,omitempty"`
 }
+
+type ResourceItem struct {
+	// CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
+	CPU string `json:"cpu,omitempty"`
+	// Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
+	Memory string `json:"memory,omitempty"`
+	// GPU indicates the number of GPUs to request.
+	// Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+	GPU string `json:"gpu,omitempty"`
+	// GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
+	// By default if not specified, the GPU type is "nvidia.com/gpu"
+	GPUType string `json:"gpuType,omitempty"`
+	// Custom specifies additional custom resource requests/limits
+	Custom map[string]string `json:"custom,omitempty"`
+}
+
+// Resources defines requested and limits for a component, including CPU, memory,
+// GPUs/devices, and any runtime-specific resources.
+type Resources struct {
+	// Requests specifies the minimum resources required by the component
+	Requests *ResourceItem `json:"requests,omitempty"`
+	// Limits specifies the maximum resources allowed for the component
+	Limits *ResourceItem `json:"limits,omitempty"`
+	// Claims specifies resource claims for dynamic resource allocation
+	Claims []corev1.ResourceClaim `json:"claims,omitempty"`
+}
+
+type DeploymentTargetHPAConf struct {
+	CPU         *int32  `json:"cpu,omitempty"`
+	GPU         *int32  `json:"gpu,omitempty"`
+	Memory      *string `json:"memory,omitempty"`
+	QPS         *int64  `json:"qps,omitempty"`
+	MinReplicas *int32  `json:"min_replicas,omitempty"`
+	MaxReplicas *int32  `json:"max_replicas,omitempty"`
+}
+
+type LabelItemSchema struct {
+	Key   string `json:"key"`
+	Value string `json:"value"`
+}
+
+type ExtraPodMetadata struct {
+	Annotations map[string]string `json:"annotations,omitempty"`
+	Labels      map[string]string `json:"labels,omitempty"`
+}
+
+type ExtraPodSpec struct {
+	*corev1.PodSpec `json:",inline"`
+	MainContainer   *corev1.Container `json:"mainContainer,omitempty"`
+}
--- a/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
+++ b/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
@@ -20,7 +20,6 @@
 package v1alpha1

 import (
-	dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
 	commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -74,7 +73,7 @@ type DynamoComponentDeploymentSharedSpec struct {

 	// Resources requested and limits for this component, including CPU, memory,
 	// GPUs/devices, and any runtime-specific resources.
-	Resources *dynamoCommon.Resources `json:"resources,omitempty"`
+	Resources *Resources `json:"resources,omitempty"`
 	// Autoscaling config for this component (replica range, target utilization, etc.).
 	Autoscaling *Autoscaling `json:"autoscaling,omitempty"`
 	// Envs defines additional environment variables to inject into the component containers.
@@ -98,12 +97,12 @@ type DynamoComponentDeploymentSharedSpec struct {

 	// +optional
 	// ExtraPodMetadata adds labels/annotations to the created Pods.
-	ExtraPodMetadata *dynamoCommon.ExtraPodMetadata `json:"extraPodMetadata,omitempty"`
+	ExtraPodMetadata *ExtraPodMetadata `json:"extraPodMetadata,omitempty"`
 	// +optional
 	// ExtraPodSpec allows to override the main pod spec configuration.
 	// It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
 	// that allows overriding the main container configuration.
-	ExtraPodSpec *dynamoCommon.ExtraPodSpec `json:"extraPodSpec,omitempty"`
+	ExtraPodSpec *ExtraPodSpec `json:"extraPodSpec,omitempty"`

 	// LivenessProbe to detect and restart unhealthy containers.
 	LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"`

--- a/deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go
+++ b/deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go
@@ -38,11 +38,10 @@ limitations under the License.
 package v1alpha1

 import (
-	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
 	"k8s.io/api/autoscaling/v2"
-	corev1 "k8s.io/api/core/v1"
+	"k8s.io/api/core/v1"
 	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
-	"k8s.io/apimachinery/pkg/apis/meta/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 )

@@ -98,7 +97,7 @@ func (in *BaseStatus) DeepCopyInto(out *BaseStatus) {
 	*out = *in
 	if in.Conditions != nil {
 		in, out := &in.Conditions, &out.Conditions
-		*out = make([]v1.Condition, len(*in))
+		*out = make([]metav1.Condition, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
@@ -174,6 +173,51 @@ func (in *DeploymentStatus) DeepCopy() *DeploymentStatus {
 	return out
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DeploymentTargetHPAConf) DeepCopyInto(out *DeploymentTargetHPAConf) {
+	*out = *in
+	if in.CPU != nil {
+		in, out := &in.CPU, &out.CPU
+		*out = new(int32)
+		**out = **in
+	}
+	if in.GPU != nil {
+		in, out := &in.GPU, &out.GPU
+		*out = new(int32)
+		**out = **in
+	}
+	if in.Memory != nil {
+		in, out := &in.Memory, &out.Memory
+		*out = new(string)
+		**out = **in
+	}
+	if in.QPS != nil {
+		in, out := &in.QPS, &out.QPS
+		*out = new(int64)
+		**out = **in
+	}
+	if in.MinReplicas != nil {
+		in, out := &in.MinReplicas, &out.MinReplicas
+		*out = new(int32)
+		**out = **in
+	}
+	if in.MaxReplicas != nil {
+		in, out := &in.MaxReplicas, &out.MaxReplicas
+		*out = new(int32)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentTargetHPAConf.
+func (in *DeploymentTargetHPAConf) DeepCopy() *DeploymentTargetHPAConf {
+	if in == nil {
+		return nil
+	}
+	out := new(DeploymentTargetHPAConf)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *DynamoComponentDeployment) DeepCopyInto(out *DynamoComponentDeployment) {
 	*out = *in
@@ -257,7 +301,7 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
 	}
 	if in.Resources != nil {
 		in, out := &in.Resources, &out.Resources
-		*out = new(common.Resources)
+		*out = new(Resources)
 		(*in).DeepCopyInto(*out)
 	}
 	if in.Autoscaling != nil {
@@ -267,7 +311,7 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
 	}
 	if in.Envs != nil {
 		in, out := &in.Envs, &out.Envs
-		*out = make([]corev1.EnvVar, len(*in))
+		*out = make([]v1.EnvVar, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
@@ -299,22 +343,22 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
 	}
 	if in.ExtraPodMetadata != nil {
 		in, out := &in.ExtraPodMetadata, &out.ExtraPodMetadata
-		*out = new(common.ExtraPodMetadata)
+		*out = new(ExtraPodMetadata)
 		(*in).DeepCopyInto(*out)
 	}
 	if in.ExtraPodSpec != nil {
 		in, out := &in.ExtraPodSpec, &out.ExtraPodSpec
-		*out = new(common.ExtraPodSpec)
+		*out = new(ExtraPodSpec)
 		(*in).DeepCopyInto(*out)
 	}
 	if in.LivenessProbe != nil {
 		in, out := &in.LivenessProbe, &out.LivenessProbe
-		*out = new(corev1.Probe)
+		*out = new(v1.Probe)
 		(*in).DeepCopyInto(*out)
 	}
 	if in.ReadinessProbe != nil {
 		in, out := &in.ReadinessProbe, &out.ReadinessProbe
-		*out = new(corev1.Probe)
+		*out = new(v1.Probe)
 		(*in).DeepCopyInto(*out)
 	}
 	if in.Replicas != nil {
@@ -360,7 +404,7 @@ func (in *DynamoComponentDeploymentStatus) DeepCopyInto(out *DynamoComponentDepl
 	*out = *in
 	if in.Conditions != nil {
 		in, out := &in.Conditions, &out.Conditions
-		*out = make([]v1.Condition, len(*in))
+		*out = make([]metav1.Condition, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
@@ -528,7 +572,7 @@ func (in *DynamoGraphDeploymentRequestStatus) DeepCopyInto(out *DynamoGraphDeplo
 	*out = *in
 	if in.Conditions != nil {
 		in, out := &in.Conditions, &out.Conditions
-		*out = make([]v1.Condition, len(*in))
+		*out = make([]metav1.Condition, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
@@ -583,7 +627,7 @@ func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec
 	}
 	if in.Envs != nil {
 		in, out := &in.Envs, &out.Envs
-		*out = make([]corev1.EnvVar, len(*in))
+		*out = make([]v1.EnvVar, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
@@ -605,7 +649,7 @@ func (in *DynamoGraphDeploymentStatus) DeepCopyInto(out *DynamoGraphDeploymentSt
 	*out = *in
 	if in.Conditions != nil {
 		in, out := &in.Conditions, &out.Conditions
-		*out = make([]v1.Condition, len(*in))
+		*out = make([]metav1.Condition, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
@@ -711,7 +755,7 @@ func (in *DynamoModelStatus) DeepCopyInto(out *DynamoModelStatus) {
 	}
 	if in.Conditions != nil {
 		in, out := &in.Conditions, &out.Conditions
-		*out = make([]v1.Condition, len(*in))
+		*out = make([]metav1.Condition, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
@@ -743,6 +787,60 @@ func (in *EndpointInfo) DeepCopy() *EndpointInfo {
 	return out
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ExtraPodMetadata) DeepCopyInto(out *ExtraPodMetadata) {
+	*out = *in
+	if in.Annotations != nil {
+		in, out := &in.Annotations, &out.Annotations
+		*out = make(map[string]string, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
+	if in.Labels != nil {
+		in, out := &in.Labels, &out.Labels
+		*out = make(map[string]string, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodMetadata.
+func (in *ExtraPodMetadata) DeepCopy() *ExtraPodMetadata {
+	if in == nil {
+		return nil
+	}
+	out := new(ExtraPodMetadata)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ExtraPodSpec) DeepCopyInto(out *ExtraPodSpec) {
+	*out = *in
+	if in.PodSpec != nil {
+		in, out := &in.PodSpec, &out.PodSpec
+		*out = new(v1.PodSpec)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.MainContainer != nil {
+		in, out := &in.MainContainer, &out.MainContainer
+		*out = new(v1.Container)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtraPodSpec.
+func (in *ExtraPodSpec) DeepCopy() *ExtraPodSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(ExtraPodSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *IngressSpec) DeepCopyInto(out *IngressSpec) {
 	*out = *in
@@ -812,6 +910,21 @@ func (in *IngressTLSSpec) DeepCopy() *IngressTLSSpec {
 	return out
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LabelItemSchema) DeepCopyInto(out *LabelItemSchema) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelItemSchema.
+func (in *LabelItemSchema) DeepCopy() *LabelItemSchema {
+	if in == nil {
+		return nil
+	}
+	out := new(LabelItemSchema)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ModelReference) DeepCopyInto(out *ModelReference) {
 	*out = *in
@@ -908,6 +1021,58 @@ func (in *ProfilingConfigSpec) DeepCopy() *ProfilingConfigSpec {
 	return out
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ResourceItem) DeepCopyInto(out *ResourceItem) {
+	*out = *in
+	if in.Custom != nil {
+		in, out := &in.Custom, &out.Custom
+		*out = make(map[string]string, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceItem.
+func (in *ResourceItem) DeepCopy() *ResourceItem {
+	if in == nil {
+		return nil
+	}
+	out := new(ResourceItem)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Resources) DeepCopyInto(out *Resources) {
+	*out = *in
+	if in.Requests != nil {
+		in, out := &in.Requests, &out.Requests
+		*out = new(ResourceItem)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.Limits != nil {
+		in, out := &in.Limits, &out.Limits
+		*out = new(ResourceItem)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.Claims != nil {
+		in, out := &in.Claims, &out.Claims
+		*out = make([]v1.ResourceClaim, len(*in))
+		copy(*out, *in)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Resources.
+func (in *Resources) DeepCopy() *Resources {
+	if in == nil {
+		return nil
+	}
+	out := new(Resources)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *SharedMemorySpec) DeepCopyInto(out *SharedMemorySpec) {
 	*out = *in

--- a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml
+++ b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml
@@ -10193,6 +10193,7 @@ spec:
                    GPUs/devices, and any runtime-specific resources.
                  properties:
                    claims:
+                      description: Claims specifies resource claims for dynamic resource allocation
                      items:
                        description: ResourceClaim references one entry in PodSpec.ResourceClaims.
                        properties:
@@ -10213,35 +10214,53 @@ spec:
                        type: object
                      type: array
                    limits:
+                      description: Limits specifies the maximum resources allowed for the component
                      properties:
                        cpu:
+                          description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
                          type: string
                        custom:
                          additionalProperties:
                            type: string
+                          description: Custom specifies additional custom resource requests/limits
                          type: object
                        gpu:
                          description: |-
-                            Indicates the number of GPUs to request.
-                            total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                            GPU indicates the number of GPUs to request.
+                            Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                          type: string
+                        gpuType:
+                          description: |-
+                            GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
+                            By default if not specified, the GPU type is "nvidia.com/gpu"
                          type: string
                        memory:
+                          description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
                          type: string
                      type: object
                    requests:
+                      description: Requests specifies the minimum resources required by the component
                      properties:
                        cpu:
+                          description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
                          type: string
                        custom:
                          additionalProperties:
                            type: string
+                          description: Custom specifies additional custom resource requests/limits
                          type: object
                        gpu:
                          description: |-
-                            Indicates the number of GPUs to request.
-                            total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                            GPU indicates the number of GPUs to request.
+                            Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                          type: string
+                        gpuType:
+                          description: |-
+                            GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
+                            By default if not specified, the GPU type is "nvidia.com/gpu"
                          type: string
                        memory:
+                          description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
                          type: string
                      type: object
                  type: object

--- a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
+++ b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
@@ -10328,6 +10328,7 @@ spec:
                          GPUs/devices, and any runtime-specific resources.
                        properties:
                          claims:
+                            description: Claims specifies resource claims for dynamic resource allocation
                            items:
                              description: ResourceClaim references one entry in PodSpec.ResourceClaims.
                              properties:
@@ -10348,35 +10349,53 @@ spec:
                              type: object
                            type: array
                          limits:
+                            description: Limits specifies the maximum resources allowed for the component
                            properties:
                              cpu:
+                                description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
                                type: string
                              custom:
                                additionalProperties:
                                  type: string
+                                description: Custom specifies additional custom resource requests/limits
                                type: object
                              gpu:
                                description: |-
-                                  Indicates the number of GPUs to request.
-                                  total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                                  GPU indicates the number of GPUs to request.
+                                  Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                                type: string
+                              gpuType:
+                                description: |-
+                                  GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
+                                  By default if not specified, the GPU type is "nvidia.com/gpu"
                                type: string
                              memory:
+                                description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
                                type: string
                            type: object
                          requests:
+                            description: Requests specifies the minimum resources required by the component
                            properties:
                              cpu:
+                                description: CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
                                type: string
                              custom:
                                additionalProperties:
                                  type: string
+                                description: Custom specifies additional custom resource requests/limits
                                type: object
                              gpu:
                                description: |-
-                                  Indicates the number of GPUs to request.
-                                  total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                                  GPU indicates the number of GPUs to request.
+                                  Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+                                type: string
+                              gpuType:
+                                description: |-
+                                  GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
+                                  By default if not specified, the GPU type is "nvidia.com/gpu"
                                type: string
                              memory:
+                                description: Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
                                type: string
                            type: object
                        type: object

--- a/deploy/cloud/operator/api/dynamo/schemas/schemas.go
+++ b/deploy/cloud/operator/api/dynamo/schemas/schemas.go
@@ -15,51 +15,7 @@
 * limitations under the License.
 */

-package schemas
-
-import (
-	"encoding/json"
-	"errors"
-	"time"
-)
-
-type DynamoComponent struct {
-	PresignedDownloadUrl string                `json:"presigned_download_url"`
-	TransmissionStrategy *TransmissionStrategy `json:"transmission_strategy"`
-}
-
-type TransmissionStrategy string
-
-const (
-	TransmissionStrategyPresignedURL TransmissionStrategy = "presigned_url"
-	TransmissionStrategyProxy        TransmissionStrategy = "proxy"
-)
-
-type Duration time.Duration
-
-func (d Duration) MarshalJSON() ([]byte, error) {
-	return json.Marshal(time.Duration(d).String())
-}
-
-func (d *Duration) UnmarshalJSON(b []byte) error {
-	var v any
-	if err := json.Unmarshal(b, &v); err != nil {
-		return err
-	}
-	switch value := v.(type) {
-	case float64:
-		*d = Duration(time.Duration(value))
-	case string:
-		tmp, err := time.ParseDuration(value)
-		if err != nil {
-			return err
-		}
-		*d = Duration(tmp)
-	default:
-		return errors.New("invalid duration")
-	}
-	return nil
-}
+package common

 type DeploymentStrategy string

@@ -69,10 +25,3 @@ const (
 	DeploymentStrategyRampedSlowRollout           DeploymentStrategy = "RampedSlowRollout"
 	DeploymentStrategyBestEffortControlledRollout DeploymentStrategy = "BestEffortControlledRollout"
 )
-
-type DockerRegistrySchema struct {
-	DynamoRepositoryURI string `json:"dynamoRepositoryURI"`
-	Server              string `json:"server"`
-	SecretName          string `json:"secretName"`
-	Secure              bool   `json:"secure"`
-}
--- a/deploy/cloud/operator/internal/common/url.go
+++ b/deploy/cloud/operator/internal/common/url.go
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package common

 import (

--- a/deploy/cloud/operator/internal/common/url_test.go
+++ b/deploy/cloud/operator/internal/common/url_test.go
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package common

 import "testing"

--- a/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
+++ b/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
@@ -33,8 +33,8 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

 	"emperror.dev/errors"
-	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/schemas"
 	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
+	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/common"
 	commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
 	commonController "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
 	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/dynamo"
@@ -1003,9 +1003,9 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con
 	resourceAnnotations := getResourceAnnotations(opt.dynamoComponentDeployment)
 	strategyStr := resourceAnnotations[KubeAnnotationDeploymentStrategy]
 	if strategyStr != "" {
-		strategyType := schemas.DeploymentStrategy(strategyStr)
+		strategyType := common.DeploymentStrategy(strategyStr)
 		switch strategyType {
-		case schemas.DeploymentStrategyRollingUpdate:
+		case common.DeploymentStrategyRollingUpdate:
 			strategy = appsv1.DeploymentStrategy{
 				Type: appsv1.RollingUpdateDeploymentStrategyType,
 				RollingUpdate: &appsv1.RollingUpdateDeployment{
@@ -1013,11 +1013,11 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con
 					MaxUnavailable: &defaultMaxUnavailable,
 				},
 			}
-		case schemas.DeploymentStrategyRecreate:
+		case common.DeploymentStrategyRecreate:
 			strategy = appsv1.DeploymentStrategy{
 				Type: appsv1.RecreateDeploymentStrategyType,
 			}
-		case schemas.DeploymentStrategyRampedSlowRollout:
+		case common.DeploymentStrategyRampedSlowRollout:
 			strategy = appsv1.DeploymentStrategy{
 				Type: appsv1.RollingUpdateDeploymentStrategyType,
 				RollingUpdate: &appsv1.RollingUpdateDeployment{
@@ -1025,7 +1025,7 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con
 					MaxUnavailable: &[]intstr.IntOrString{intstr.FromInt(0)}[0],
 				},
 			}
-		case schemas.DeploymentStrategyBestEffortControlledRollout:
+		case common.DeploymentStrategyBestEffortControlledRollout:
 			strategy = appsv1.DeploymentStrategy{
 				Type: appsv1.RollingUpdateDeploymentStrategyType,
 				RollingUpdate: &appsv1.RollingUpdateDeployment{

--- a/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
+++ b/deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
@@ -24,7 +24,6 @@ import (
 	"fmt"
 	"testing"

-	dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
 	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
 	commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
 	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
@@ -703,18 +702,18 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
 								Multinode: &v1alpha1.MultinodeSpec{
 									NodeCount: 2,
 								},
-								Resources: &dynamoCommon.Resources{
-									Requests: &dynamoCommon.ResourceItem{
+								Resources: &v1alpha1.Resources{
+									Requests: &v1alpha1.ResourceItem{
 										CPU:    "300m",
 										Memory: "500Mi",
 									},
-									Limits: &dynamoCommon.ResourceItem{
+									Limits: &v1alpha1.ResourceItem{
 										GPU:    "1",
 										Memory: "20Gi",
 										CPU:    "10",
 									},
 								},
-								ExtraPodMetadata: &dynamoCommon.ExtraPodMetadata{
+								ExtraPodMetadata: &v1alpha1.ExtraPodMetadata{
 									Annotations: map[string]string{
 										"nvidia.com/annotation1": "annotation1",
 									},
@@ -722,7 +721,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
 										"nvidia.com/label1": "label1",
 									},
 								},
-								ExtraPodSpec: &dynamoCommon.ExtraPodSpec{
+								ExtraPodSpec: &v1alpha1.ExtraPodSpec{
 									PodSpec: &corev1.PodSpec{
 										TerminationGracePeriodSeconds: ptr.To(int64(10)),
 										Containers: []corev1.Container{
@@ -866,7 +865,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
 											Limits: corev1.ResourceList{
 												corev1.ResourceMemory: resource.MustParse("20Gi"),
 												corev1.ResourceCPU:    resource.MustParse("10"),
-												"nvidia.com/gpu":      resource.MustParse("1"),
+												corev1.ResourceName(commonconsts.KubeResourceGPUNvidia): resource.MustParse("1"),
 											},
 										},
 										LivenessProbe: &corev1.Probe{
@@ -1024,12 +1023,12 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
 								Multinode: &v1alpha1.MultinodeSpec{
 									NodeCount: 2,
 								},
-								Resources: &dynamoCommon.Resources{
-									Limits: &dynamoCommon.ResourceItem{
+								Resources: &v1alpha1.Resources{
+									Limits: &v1alpha1.ResourceItem{
 										GPU: "1",
 									},
 								},
-								ExtraPodSpec: &dynamoCommon.ExtraPodSpec{
+								ExtraPodSpec: &v1alpha1.ExtraPodSpec{
 									MainContainer: &corev1.Container{
 										Image: "test-image:latest",
 									},
@@ -1067,12 +1066,12 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
 								Multinode: &v1alpha1.MultinodeSpec{
 									NodeCount: 2,
 								},
-								Resources: &dynamoCommon.Resources{
-									Limits: &dynamoCommon.ResourceItem{
+								Resources: &v1alpha1.Resources{
+									Limits: &v1alpha1.ResourceItem{
 										GPU: "1",
 									},
 								},
-								ExtraPodSpec: &dynamoCommon.ExtraPodSpec{
+								ExtraPodSpec: &v1alpha1.ExtraPodSpec{
 									MainContainer: &corev1.Container{
 										Image: "", // Image is missing, will cause error in generatePodTemplateSpec
 									},

--- a/deploy/cloud/operator/internal/controller_common/resource.go
+++ b/deploy/cloud/operator/internal/controller_common/resource.go
@@ -25,7 +25,7 @@ import (
 	"reflect"
 	"sort"

-	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
+	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
 	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
 	"github.com/google/go-cmp/cmp"
 	corev1 "k8s.io/api/core/v1"
@@ -386,7 +386,7 @@ func firstKey(m map[string]interface{}) string {
 	return keys[0]
 }

-func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequirements, error) {
+func GetResourcesConfig(resources *v1alpha1.Resources) (*corev1.ResourceRequirements, error) {

 	if resources == nil {
 		return nil, nil
@@ -423,7 +423,7 @@ func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequiremen
 			if currentResources.Limits == nil {
 				currentResources.Limits = make(corev1.ResourceList)
 			}
-			currentResources.Limits[corev1.ResourceName(consts.KubeResourceGPUNvidia)] = q
+			currentResources.Limits[getGPUResourceName(resources.Limits)] = q
 		}
 		for k, v := range resources.Limits.Custom {
 			q, err := resource.ParseQuantity(v)
@@ -477,6 +477,16 @@ func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequiremen
 	return currentResources, nil
 }

+func getGPUResourceName(resourceItem *v1alpha1.ResourceItem) corev1.ResourceName {
+	if resourceItem == nil {
+		return corev1.ResourceName(consts.KubeResourceGPUNvidia)
+	}
+	if resourceItem.GPUType != "" {
+		return corev1.ResourceName(resourceItem.GPUType)
+	}
+	return corev1.ResourceName(consts.KubeResourceGPUNvidia)
+}
+
 type Resource struct {
 	client.Object
 	isReady func() (bool, string)

--- a/deploy/cloud/operator/internal/controller_common/resource_test.go
+++ b/deploy/cloud/operator/internal/controller_common/resource_test.go
@@ -21,8 +21,11 @@ import (
 	"testing"

 	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

+	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
+	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
 	"github.com/bsm/gomega"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"sigs.k8s.io/controller-runtime/pkg/client"
@@ -474,3 +477,58 @@ func TestCopySpec(t *testing.T) {
 	g := gomega.NewGomegaWithT(t)
 	g.Expect(dst).To(gomega.Equal(expected))
 }
+
+func TestGetResourcesConfig(t *testing.T) {
+	tests := []struct {
+		name             string
+		resources        *v1alpha1.Resources
+		expectedGPULimit corev1.ResourceName
+		expectedGPUValue string
+		expectError      bool
+	}{
+		{
+			name: "limits.gpu defined with no gpuType",
+			resources: &v1alpha1.Resources{
+				Limits: &v1alpha1.ResourceItem{
+					GPU: "4",
+				},
+			},
+			expectedGPULimit: corev1.ResourceName(consts.KubeResourceGPUNvidia),
+			expectedGPUValue: "4",
+			expectError:      false,
+		},
+		{
+			name: "limits.gpu defined with custom gpuType",
+			resources: &v1alpha1.Resources{
+				Limits: &v1alpha1.ResourceItem{
+					GPU:     "8",
+					GPUType: "gpu.custom-type.com/test",
+				},
+			},
+			expectedGPULimit: corev1.ResourceName("gpu.custom-type.com/test"),
+			expectedGPUValue: "8",
+			expectError:      false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			g := gomega.NewGomegaWithT(t)
+
+			result, err := GetResourcesConfig(tt.resources)
+
+			if tt.expectError {
+				g.Expect(err).To(gomega.HaveOccurred())
+				return
+			}
+
+			g.Expect(err).To(gomega.BeNil())
+			g.Expect(result).ToNot(gomega.BeNil())
+			g.Expect(result.Limits).ToNot(gomega.BeNil())
+
+			gpuQuantity, exists := result.Limits[tt.expectedGPULimit]
+			g.Expect(exists).To(gomega.BeTrue(), "GPU resource %s should exist in limits", tt.expectedGPULimit)
+			g.Expect(gpuQuantity.String()).To(gomega.Equal(tt.expectedGPUValue))
+		})
+	}
+}
--- a/deploy/cloud/operator/internal/dynamo/backend_trtllm.go
+++ b/deploy/cloud/operator/internal/dynamo/backend_trtllm.go
@@ -6,7 +6,6 @@ import (
 	"strconv"
 	"strings"

-	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
 	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
 	commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
 	corev1 "k8s.io/api/core/v1"
@@ -197,7 +196,7 @@ func (b *TRTLLMBackend) generateWorkerHostnames(numberOfNodes int32, serviceName
 }

 // getGPUsPerNode extracts the number of GPUs per node from resources
-func getGPUsPerNode(resources *common.Resources) int32 {
+func getGPUsPerNode(resources *v1alpha1.Resources) int32 {
 	if resources != nil && resources.Requests != nil && resources.Requests.GPU != "" {
 		if gpus, err := strconv.ParseInt(resources.Requests.GPU, 10, 32); err == nil {
 			return int32(gpus)

--- a/deploy/cloud/operator/internal/dynamo/backend_trtllm_test.go
+++ b/deploy/cloud/operator/internal/dynamo/backend_trtllm_test.go
@@ -4,7 +4,6 @@ import (
 	"strings"
 	"testing"

-	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
 	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
 	commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
 	corev1 "k8s.io/api/core/v1"
@@ -52,8 +51,8 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
 			role:              RoleLeader,
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
-				Resources: &common.Resources{
-					Requests: &common.ResourceItem{
+				Resources: &v1alpha1.Resources{
+					Requests: &v1alpha1.ResourceItem{
 						GPU: "2",
 					},
 				},
@@ -106,8 +105,8 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
 			role:              RoleLeader,
 			multinodeDeployer: &LWSMultinodeDeployer{},
 			component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
-				Resources: &common.Resources{
-					Limits: &common.ResourceItem{
+				Resources: &v1alpha1.Resources{
+					Limits: &v1alpha1.ResourceItem{
 						GPU: "1",
 					},
 				},
@@ -557,8 +556,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			serviceName:       "test-service",
 			component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
-				Resources: &common.Resources{
-					Requests: &common.ResourceItem{
+				Resources: &v1alpha1.Resources{
+					Requests: &v1alpha1.ResourceItem{
 						GPU: "2",
 					},
 				},
@@ -583,8 +582,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			serviceName:       "test",
 			component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
-				Resources: &common.Resources{
-					Limits: &common.ResourceItem{
+				Resources: &v1alpha1.Resources{
+					Limits: &v1alpha1.ResourceItem{
 						GPU: "1",
 					},
 				},
@@ -599,8 +598,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			serviceName:       "test",
 			component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
-				Resources: &common.Resources{
-					Limits: &common.ResourceItem{
+				Resources: &v1alpha1.Resources{
+					Limits: &v1alpha1.ResourceItem{
 						GPU: "1",
 					},
 				},
@@ -615,8 +614,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			serviceName:       "test",
 			component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
-				Resources: &common.Resources{
-					Limits: &common.ResourceItem{
+				Resources: &v1alpha1.Resources{
+					Limits: &v1alpha1.ResourceItem{
 						GPU: "1",
 					},
 				},
@@ -631,8 +630,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			serviceName:       "test",
 			component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
-				Resources: &common.Resources{
-					Limits: &common.ResourceItem{
+				Resources: &v1alpha1.Resources{
+					Limits: &v1alpha1.ResourceItem{
 						GPU: "1",
 					},
 				},
@@ -647,8 +646,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			serviceName:       "test",
 			component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
-				Resources: &common.Resources{
-					Requests: &common.ResourceItem{
+				Resources: &v1alpha1.Resources{
+					Requests: &v1alpha1.ResourceItem{
 						GPU: "1",
 					},
 				},
@@ -663,8 +662,8 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			serviceName:       "test",
 			component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
-				Resources: &common.Resources{
-					Requests: &common.ResourceItem{
+				Resources: &v1alpha1.Resources{
+					Requests: &v1alpha1.ResourceItem{
 						GPU: "1",
 					},
 				},
@@ -778,7 +777,7 @@ func TestTRTLLMBackend_setupWorkerContainer(t *testing.T) {
 func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
 	tests := []struct {
 		name      string
-		resources *common.Resources
+		resources *v1alpha1.Resources
 		expected  int32
 	}{
 		{
@@ -788,13 +787,13 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
 		},
 		{
 			name:      "Empty resources - default to 0",
-			resources: &common.Resources{},
+			resources: &v1alpha1.Resources{},
 			expected:  0,
 		},
 		{
 			name: "GPU in requests",
-			resources: &common.Resources{
-				Requests: &common.ResourceItem{
+			resources: &v1alpha1.Resources{
+				Requests: &v1alpha1.ResourceItem{
 					GPU: "2",
 				},
 			},
@@ -802,8 +801,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
 		},
 		{
 			name: "GPU in limits",
-			resources: &common.Resources{
-				Limits: &common.ResourceItem{
+			resources: &v1alpha1.Resources{
+				Limits: &v1alpha1.ResourceItem{
 					GPU: "4",
 				},
 			},
@@ -811,11 +810,11 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
 		},
 		{
 			name: "GPU in both requests and limits - requests takes precedence",
-			resources: &common.Resources{
-				Requests: &common.ResourceItem{
+			resources: &v1alpha1.Resources{
+				Requests: &v1alpha1.ResourceItem{
 					GPU: "3",
 				},
-				Limits: &common.ResourceItem{
+				Limits: &v1alpha1.ResourceItem{
 					GPU: "8",
 				},
 			},
@@ -823,8 +822,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
 		},
 		{
 			name: "Invalid GPU value - default to 0",
-			resources: &common.Resources{
-				Requests: &common.ResourceItem{
+			resources: &v1alpha1.Resources{
+				Requests: &v1alpha1.ResourceItem{
 					GPU: "invalid",
 				},
 			},
@@ -832,8 +831,8 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
 		},
 		{
 			name: "Empty GPU string - default to 0",
-			resources: &common.Resources{
-				Requests: &common.ResourceItem{
+			resources: &v1alpha1.Resources{
+				Requests: &v1alpha1.ResourceItem{
 					GPU: "",
 				},
 			},

--- a/deploy/cloud/operator/internal/dynamo/backend_vllm.go
+++ b/deploy/cloud/operator/internal/dynamo/backend_vllm.go
@@ -6,7 +6,6 @@ import (
 	"strings"

 	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
-	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
 	corev1 "k8s.io/api/core/v1"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 )
@@ -26,7 +25,7 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes

 	if isMultinode {
 		// Apply multinode-specific argument modifications
-		updateVLLMMultinodeArgs(container, role, serviceName, multinodeDeployer)
+		updateVLLMMultinodeArgs(container, role, serviceName, multinodeDeployer, component.Resources)

 		// Remove probes for multinode worker and leader
 		if role == RoleWorker {
@@ -72,12 +71,12 @@ func (b *VLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32

 // updateVLLMMultinodeArgs will inject Ray-specific flags for tensor parallel multinode deployments
 // OR data parallel flags for data parallel multinode deployments
-func updateVLLMMultinodeArgs(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer) {
+func updateVLLMMultinodeArgs(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer, resources *v1alpha1.Resources) {
 	expandedArgs := getExpandedArgs(container)
-	if needsRayDistributedLaunch(expandedArgs, container.Resources) {
+	if needsRayDistributedLaunch(expandedArgs, resources) {
 		injectRayDistributedLaunchFlags(container, role, serviceName, multinodeDeployer)
-	} else if needsDataParallelLaunch(expandedArgs, container.Resources) {
-		injectDataParallelLaunchFlags(container, role, serviceName, multinodeDeployer)
+	} else if needsDataParallelLaunch(expandedArgs, resources) {
+		injectDataParallelLaunchFlags(container, role, serviceName, multinodeDeployer, resources)
 	} else {
 		logger := log.Log.WithName("vllm-backend")
 		logger.Info("No need to inject Ray-specific or data parallel flags for multinode deployments", "args", strings.Join(container.Args, " "))
@@ -109,10 +108,10 @@ func injectRayDistributedLaunchFlags(container *corev1.Container, role Role, ser
 	container.Command = []string{"/bin/sh", "-c"} // ensure cmd is a shell
 }

-func injectDataParallelLaunchFlags(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer) {
+func injectDataParallelLaunchFlags(container *corev1.Container, role Role, serviceName string, multinodeDeployer MultinodeDeployer, resources *v1alpha1.Resources) {
 	expandedArgs := getExpandedArgs(container)
 	leaderHostname := multinodeDeployer.GetLeaderHostname(serviceName)
-	dataParallelSizeLocal := getContainerGPUs(container.Resources) / getWorldSize(expandedArgs)
+	dataParallelSizeLocal := getContainerGPUs(resources) / getWorldSize(expandedArgs)
 	var startRank string
 	switch role {
 	case RoleWorker:
@@ -134,8 +133,12 @@ func injectDataParallelLaunchFlags(container *corev1.Container, role Role, servi

 // if world size (within DP rank) > GPU count, then we need to inject ray
 // world size = tensor parallel size * pipeline parallel size
-func needsRayDistributedLaunch(expandedArgs []string, resources corev1.ResourceRequirements) bool {
-	return getWorldSize(expandedArgs) > getContainerGPUs(resources)
+func needsRayDistributedLaunch(expandedArgs []string, resources *v1alpha1.Resources) bool {
+	containerGPUs := getContainerGPUs(resources)
+	if containerGPUs == 0 {
+		return false
+	}
+	return getWorldSize(expandedArgs) > containerGPUs
 }

 func getWorldSize(expandedArgs []string) int64 {
@@ -145,9 +148,13 @@ func getWorldSize(expandedArgs []string) int64 {
 }

 // if world size across all DP ranks > GPU count, then we need to inject data parallel multinode coordination
-func needsDataParallelLaunch(expandedArgs []string, resources corev1.ResourceRequirements) bool {
+func needsDataParallelLaunch(expandedArgs []string, resources *v1alpha1.Resources) bool {
 	dataParallelSize := getFlagValue(expandedArgs, dataParallelSizeFlag)
-	return getWorldSize(expandedArgs)*dataParallelSize > getContainerGPUs(resources)
+	containerGPUs := getContainerGPUs(resources)
+	if containerGPUs == 0 {
+		return false
+	}
+	return getWorldSize(expandedArgs)*dataParallelSize > containerGPUs
 }

 func getFlagValue(expandedArgs []string, flag string) int64 {
@@ -164,14 +171,12 @@ func getFlagValue(expandedArgs []string, flag string) int64 {
 	return flagValue
 }

-func getContainerGPUs(resources corev1.ResourceRequirements) int64 {
-	var containerGPUs int64 = 1
-	// Requests defaults to Limits, doesn't make sense in case where Requests < Limits for gpus
-	for name, quantity := range resources.Limits {
-		if name.String() == consts.KubeResourceGPUNvidia {
-			containerGPUs = quantity.Value()
-			break
+func getContainerGPUs(resources *v1alpha1.Resources) int64 {
+	if resources == nil || resources.Limits == nil || resources.Limits.GPU == "" {
+		return 0
 	}
+	if gpus, err := strconv.ParseInt(resources.Limits.GPU, 10, 64); err == nil {
+		return gpus
 	}
-	return containerGPUs
+	return 0
 }
--- a/deploy/cloud/operator/internal/dynamo/backend_vllm_test.go
+++ b/deploy/cloud/operator/internal/dynamo/backend_vllm_test.go
@@ -3,12 +3,12 @@ package dynamo
 import (
 	"fmt"
 	"reflect"
+	"strconv"
 	"testing"

 	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
 	"github.com/onsi/gomega"
 	corev1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/api/resource"
 )

 func TestVLLMBackend_UpdateContainer(t *testing.T) {
@@ -21,6 +21,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
 		component           *v1alpha1.DynamoComponentDeploymentSharedSpec
 		multinodeDeployer   MultinodeDeployer
 		initialContainer    *corev1.Container
+		gpuCount            int64 // GPU count for the test case
 		expectedArgs        []string
 		expectNotModified   bool // If true, container args should not change
 		expectProbesRemoved bool // If true, probes should be nil
@@ -32,6 +33,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
 			component:         &v1alpha1.DynamoComponentDeploymentSharedSpec{},
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			initialContainer:  &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm"}},
+			gpuCount:          0,
 			expectNotModified: true,
 		},
 		{
@@ -40,7 +42,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
 			role:                RoleLeader,
 			component:           &v1alpha1.DynamoComponentDeploymentSharedSpec{},
 			multinodeDeployer:   &GroveMultinodeDeployer{},
-			initialContainer:    &corev1.Container{Command: []string{"python3", "-m", "dynamo.vllm"}, Args: []string{"--model", "test", tensorParallelSizeFlag, "8"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}},
+			initialContainer:    &corev1.Container{Command: []string{"python3", "-m", "dynamo.vllm"}, Args: []string{"--model", "test", tensorParallelSizeFlag, "8"}},
+			gpuCount:            4,
 			expectedArgs:        []string{fmt.Sprintf("ray start --head --port=%s && python3 -m dynamo.vllm --model test %s 8", VLLMPort, tensorParallelSizeFlag)},
 			expectProbesRemoved: true,
 		},
@@ -50,7 +53,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
 			role:                RoleWorker,
 			component:           &v1alpha1.DynamoComponentDeploymentSharedSpec{},
 			multinodeDeployer:   &GroveMultinodeDeployer{},
-			initialContainer:    &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", "--model", "test", tensorParallelSizeFlag, "8"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}},
+			initialContainer:    &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", "--model", "test", tensorParallelSizeFlag, "8"}},
+			gpuCount:            4,
 			expectedArgs:        []string{"ray start --address=$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE):6379 --block"},
 			expectProbesRemoved: true,
 		},
@@ -60,7 +64,8 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
 			role:                RoleWorker,
 			component:           &v1alpha1.DynamoComponentDeploymentSharedSpec{},
 			multinodeDeployer:   &LWSMultinodeDeployer{},
-			initialContainer:    &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "8"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}},
+			initialContainer:    &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "8"}},
+			gpuCount:            4,
 			expectedArgs:        []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"},
 			expectProbesRemoved: true,
 		},
@@ -71,6 +76,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
 			component:         &v1alpha1.DynamoComponentDeploymentSharedSpec{},
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			initialContainer:  &corev1.Container{Args: []string{}},
+			gpuCount:          0,
 			expectNotModified: true, // Should not modify empty args
 		},
 		{
@@ -80,6 +86,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
 			component:         &v1alpha1.DynamoComponentDeploymentSharedSpec{},
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			initialContainer:  &corev1.Container{Args: []string{"python3", "-m", "dynamo.frontend"}},
+			gpuCount:          0,
 			expectNotModified: true,
 		},
 	}
@@ -90,6 +97,15 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {

 			initialContainerArgs := append([]string{}, tt.initialContainer.Args...)

+			// Create resources from GPU count and set in component
+			if tt.gpuCount > 0 {
+				tt.component.Resources = &v1alpha1.Resources{
+					Limits: &v1alpha1.ResourceItem{
+						GPU: strconv.FormatInt(tt.gpuCount, 10),
+					},
+				}
+			}
+
 			// Call UpdateContainer
 			backend.UpdateContainer(tt.initialContainer, tt.numberOfNodes, tt.role, tt.component, "test-service", tt.multinodeDeployer)

@@ -119,6 +135,7 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
 		role              Role
 		multinodeDeployer MultinodeDeployer
 		initialContainer  *corev1.Container
+		gpuCount          int64 // GPU count for the test case
 		expectedArgs      []string
 		description       string
 	}{
@@ -128,6 +145,7 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
 			role:              RoleMain,
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			initialContainer:  &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{"python3 -m dynamo.vllm"}},
+			gpuCount:          0,
 			expectedArgs:      []string{"python3 -m dynamo.vllm"},
 			description:       "Single node should not modify shell commands",
 		},
@@ -136,7 +154,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
 			numberOfNodes:     2,
 			role:              RoleLeader,
 			multinodeDeployer: &GroveMultinodeDeployer{},
-			initialContainer:  &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8", dataParallelSizeFlag)}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}},
+			initialContainer:  &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8", dataParallelSizeFlag)}},
+			gpuCount:          4,
 			expectedArgs:      []string{"python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8"},
 			description:       "Shell commands should use regex injection for python commands",
 		},
@@ -145,7 +164,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
 			numberOfNodes:     2,
 			role:              RoleLeader,
 			multinodeDeployer: &GroveMultinodeDeployer{},
-			initialContainer:  &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("echo blah | wc -l && python3 -m dynamo.vllm %s 8 && ls -al", dataParallelSizeFlag)}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}},
+			initialContainer:  &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("echo blah | wc -l && python3 -m dynamo.vllm %s 8 && ls -al", dataParallelSizeFlag)}},
+			gpuCount:          4,
 			expectedArgs:      []string{"echo blah | wc -l && python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8 && ls -al"},
 			description:       "Complex shell commands should inject flags only into python part",
 		},
@@ -154,7 +174,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
 			numberOfNodes:     2,
 			role:              RoleLeader,
 			multinodeDeployer: &LWSMultinodeDeployer{},
-			initialContainer:  &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8", dataParallelSizeFlag)}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}},
+			initialContainer:  &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8", dataParallelSizeFlag)}},
+			gpuCount:          4,
 			expectedArgs:      []string{"python3 -m dynamo.vllm --data-parallel-address $(LWS_LEADER_ADDRESS) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8"},
 			description:       "LWS shell commands should use LWS variables",
 		},
@@ -163,7 +184,8 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
 			numberOfNodes:     2,
 			role:              RoleLeader,
 			multinodeDeployer: &GroveMultinodeDeployer{},
-			initialContainer:  &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8 | tee /tmp/log", dataParallelSizeFlag)}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")}}},
+			initialContainer:  &corev1.Container{Command: []string{"sh", "-c"}, Args: []string{fmt.Sprintf("python3 -m dynamo.vllm %s 8 | tee /tmp/log", dataParallelSizeFlag)}},
+			gpuCount:          4,
 			expectedArgs:      []string{"python3 -m dynamo.vllm --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0 --data-parallel-size 8 | tee /tmp/log"},
 			description:       "Shell commands with pipes should inject flags before pipe",
 		},
@@ -173,7 +195,17 @@ func TestVLLMBackend_ShellCommandInjection(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			expectedCommand := append([]string{}, tt.initialContainer.Command...)

-			backend.UpdateContainer(tt.initialContainer, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentSharedSpec{}, "test-service", tt.multinodeDeployer)
+			// Create component with resources from GPU count
+			component := &v1alpha1.DynamoComponentDeploymentSharedSpec{}
+			if tt.gpuCount > 0 {
+				component.Resources = &v1alpha1.Resources{
+					Limits: &v1alpha1.ResourceItem{
+						GPU: strconv.FormatInt(tt.gpuCount, 10),
+					},
+				}
+			}
+
+			backend.UpdateContainer(tt.initialContainer, tt.numberOfNodes, tt.role, component, "test-service", tt.multinodeDeployer)

 			if !reflect.DeepEqual(tt.initialContainer.Args, tt.expectedArgs) {
 				t.Errorf("UpdateContainer() args = %v, want %v", tt.initialContainer.Args, tt.expectedArgs)
@@ -296,6 +328,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
 		role              Role
 		multinodeDeployer MultinodeDeployer
 		initialContainer  *corev1.Container
+		gpuCount          int64 // GPU count for the test case
 		expectedArgs      []string
 		expectNotModified bool
 	}{
@@ -303,14 +336,16 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
 			name:              "leader prepends ray start --head",
 			role:              RoleLeader,
 			multinodeDeployer: &GroveMultinodeDeployer{},
-			initialContainer:  &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}},
+			initialContainer:  &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}},
+			gpuCount:          8,
 			expectedArgs:      []string{fmt.Sprintf("ray start --head --port=%s && python3 -m dynamo.vllm %s 16", VLLMPort, tensorParallelSizeFlag)},
 		},
 		{
 			name:              "leader prepends distributed data parallel flags",
 			role:              RoleLeader,
 			multinodeDeployer: &GroveMultinodeDeployer{},
-			initialContainer:  &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}},
+			initialContainer:  &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "16"}},
+			gpuCount:          8,
 			expectedArgs:      []string{fmt.Sprintf("exec python3 -m dynamo.vllm %s 16 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 8 --data-parallel-rpc-port 13445 --data-parallel-start-rank 0", dataParallelSizeFlag)},
 		},
 		{
@@ -318,34 +353,39 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
 			role:              RoleLeader,
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			initialContainer:  &corev1.Container{Args: []string{}},
+			gpuCount:          0,
 			expectNotModified: true,
 		},
 		{
 			name:              "worker with ray distributed launch Grove",
 			role:              RoleWorker,
 			multinodeDeployer: &GroveMultinodeDeployer{},
-			initialContainer:  &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}},
+			initialContainer:  &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}},
+			gpuCount:          8,
 			expectedArgs:      []string{"ray start --address=$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE):6379 --block"},
 		},
 		{
 			name:              "worker with data parallel launch Grove",
 			role:              RoleWorker,
 			multinodeDeployer: &GroveMultinodeDeployer{},
-			initialContainer:  &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}},
+			initialContainer:  &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "16"}},
+			gpuCount:          8,
 			expectedArgs:      []string{fmt.Sprintf("exec python3 -m dynamo.vllm %s 16 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 8 --data-parallel-rpc-port 13445 --data-parallel-start-rank $(( 8 * $((GROVE_PCLQ_POD_INDEX + 1)) ))", dataParallelSizeFlag)},
 		},
 		{
 			name:              "worker with data parallel launch Grove, tp > 1",
 			role:              RoleWorker,
 			multinodeDeployer: &GroveMultinodeDeployer{},
-			initialContainer:  &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "8", tensorParallelSizeFlag, "2"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}},
+			initialContainer:  &corev1.Container{Command: []string{"python3"}, Args: []string{"-m", "dynamo.vllm", dataParallelSizeFlag, "8", tensorParallelSizeFlag, "2"}},
+			gpuCount:          8,
 			expectedArgs:      []string{fmt.Sprintf("exec python3 -m dynamo.vllm %s 8 %s 2 --data-parallel-address $(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE) --data-parallel-size-local 4 --data-parallel-rpc-port 13445 --data-parallel-start-rank $(( 4 * $((GROVE_PCLQ_POD_INDEX + 1)) ))", dataParallelSizeFlag, tensorParallelSizeFlag)},
 		},
 		{
 			name:              "worker with ray distributed launch LWS",
 			role:              RoleWorker,
 			multinodeDeployer: &LWSMultinodeDeployer{},
-			initialContainer:  &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}, Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("8")}}},
+			initialContainer:  &corev1.Container{Args: []string{"python3", "-m", "dynamo.vllm", tensorParallelSizeFlag, "16"}},
+			gpuCount:          8,
 			expectedArgs:      []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"},
 		},
 		{
@@ -353,6 +393,7 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
 			role:              RoleMain,
 			multinodeDeployer: &GroveMultinodeDeployer{},
 			initialContainer:  &corev1.Container{Args: []string{"python3", "-m", "dynamo.frontend"}},
+			gpuCount:          0,
 			expectNotModified: true,
 		},
 	}
@@ -362,8 +403,19 @@ func TestUpdateVLLMMultinodeArgs(t *testing.T) {
 			g := gomega.NewGomegaWithT(t)

 			initialContainerArgs := append([]string{}, tt.initialContainer.Args...)
+
+			// Create resources from GPU count
+			var resources *v1alpha1.Resources
+			if tt.gpuCount > 0 {
+				resources = &v1alpha1.Resources{
+					Limits: &v1alpha1.ResourceItem{
+						GPU: strconv.FormatInt(tt.gpuCount, 10),
+					},
+				}
+			}
+
 			// Call updateVLLMMultinodeArgs
-			updateVLLMMultinodeArgs(tt.initialContainer, tt.role, "test-service", tt.multinodeDeployer)
+			updateVLLMMultinodeArgs(tt.initialContainer, tt.role, "test-service", tt.multinodeDeployer, resources)

 			if tt.expectNotModified {
 				// Args should not have changed