Unverified Commit 9caafa55 authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

feat: add custom gpu type to CRD (#4408)

parent 33d9ae78
...@@ -34,7 +34,6 @@ import ( ...@@ -34,7 +34,6 @@ import (
"k8s.io/utils/ptr" "k8s.io/utils/ptr"
grovev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1" grovev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
...@@ -63,15 +62,15 @@ type Autoscaling struct { ...@@ -63,15 +62,15 @@ type Autoscaling struct {
} }
type Config struct { type Config struct {
Dynamo *DynamoConfig `yaml:"dynamo,omitempty"` Dynamo *DynamoConfig `yaml:"dynamo,omitempty"`
Resources *Resources `yaml:"resources,omitempty"` Resources *Resources `yaml:"resources,omitempty"`
Traffic *Traffic `yaml:"traffic,omitempty"` Traffic *Traffic `yaml:"traffic,omitempty"`
Autoscaling *Autoscaling `yaml:"autoscaling,omitempty"` Autoscaling *Autoscaling `yaml:"autoscaling,omitempty"`
HttpExposed bool `yaml:"http_exposed,omitempty"` HttpExposed bool `yaml:"http_exposed,omitempty"`
ApiEndpoints []string `yaml:"api_endpoints,omitempty"` ApiEndpoints []string `yaml:"api_endpoints,omitempty"`
Workers *int32 `yaml:"workers,omitempty"` Workers *int32 `yaml:"workers,omitempty"`
TotalGpus *int32 `yaml:"total_gpus,omitempty"` TotalGpus *int32 `yaml:"total_gpus,omitempty"`
ExtraPodSpec *common.ExtraPodSpec `yaml:"extraPodSpec,omitempty"` ExtraPodSpec *v1alpha1.ExtraPodSpec `yaml:"extraPodSpec,omitempty"`
} }
type ServiceConfig struct { type ServiceConfig struct {
...@@ -150,7 +149,7 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD ...@@ -150,7 +149,7 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD
if component.ComponentType == commonconsts.ComponentTypePlanner { if component.ComponentType == commonconsts.ComponentTypePlanner {
// ensure that the extraPodSpec is not nil // ensure that the extraPodSpec is not nil
if deployment.Spec.ExtraPodSpec == nil { if deployment.Spec.ExtraPodSpec == nil {
deployment.Spec.ExtraPodSpec = &common.ExtraPodSpec{} deployment.Spec.ExtraPodSpec = &v1alpha1.ExtraPodSpec{}
} }
// ensure that the embedded PodSpec struct is not nil // ensure that the embedded PodSpec struct is not nil
if deployment.Spec.ExtraPodSpec.PodSpec == nil { if deployment.Spec.ExtraPodSpec.PodSpec == nil {
...@@ -231,10 +230,10 @@ func overrideWithDynDeploymentConfig(ctx context.Context, dynamoDeploymentCompon ...@@ -231,10 +230,10 @@ func overrideWithDynDeploymentConfig(ctx context.Context, dynamoDeploymentCompon
dynamoDeploymentComponent.Spec.Replicas = componentDynConfig.ServiceArgs.Workers dynamoDeploymentComponent.Spec.Replicas = componentDynConfig.ServiceArgs.Workers
} }
if componentDynConfig.ServiceArgs != nil && componentDynConfig.ServiceArgs.Resources != nil { if componentDynConfig.ServiceArgs != nil && componentDynConfig.ServiceArgs.Resources != nil {
requests := &common.ResourceItem{} requests := &v1alpha1.ResourceItem{}
limits := &common.ResourceItem{} limits := &v1alpha1.ResourceItem{}
if dynamoDeploymentComponent.Spec.Resources == nil { if dynamoDeploymentComponent.Spec.Resources == nil {
dynamoDeploymentComponent.Spec.Resources = &common.Resources{ dynamoDeploymentComponent.Spec.Resources = &v1alpha1.Resources{
Requests: requests, Requests: requests,
Limits: limits, Limits: limits,
} }
......
...@@ -66,29 +66,47 @@ spec: ...@@ -66,29 +66,47 @@ spec:
- -m - -m
- dynamo.frontend - dynamo.frontend
{{- end }} {{- end }}
{{ if $serviceSpec.resources }} {{- if $serviceSpec.resources }}
{{- $hasResources := false }}
{{- if or $serviceSpec.resources.requests $serviceSpec.resources.limits }}
{{- $hasResources = true }}
{{- end }}
{{- if $hasResources }}
resources: resources:
{{- if $serviceSpec.resources.requests }}
{{- $requestsGpuResourceName := "nvidia.com/gpu" }}
{{- if $serviceSpec.resources.requests.gpuType }}
{{- $requestsGpuResourceName = $serviceSpec.resources.requests.gpuType }}
{{- end }}
requests: requests:
{{ if $serviceSpec.resources.cpu }} {{- if $serviceSpec.resources.requests.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}" cpu: "{{ $serviceSpec.resources.requests.cpu }}"
{{ end }} {{- end }}
{{ if $serviceSpec.resources.memory }} {{- if $serviceSpec.resources.requests.memory }}
memory: "{{ $serviceSpec.resources.memory }}" memory: "{{ $serviceSpec.resources.requests.memory }}"
{{ end }} {{- end }}
{{ if $serviceSpec.resources.gpu }} {{- if $serviceSpec.resources.requests.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}" {{ $requestsGpuResourceName }}: "{{ $serviceSpec.resources.requests.gpu }}"
{{ end }} {{- end }}
{{- end }}
{{- if $serviceSpec.resources.limits }}
{{- $limitsGpuResourceName := "nvidia.com/gpu" }}
{{- if $serviceSpec.resources.limits.gpuType }}
{{- $limitsGpuResourceName = $serviceSpec.resources.limits.gpuType }}
{{- end }}
limits: limits:
{{ if $serviceSpec.resources.cpu }} {{- if $serviceSpec.resources.limits.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}" cpu: "{{ $serviceSpec.resources.limits.cpu }}"
{{ end }} {{- end }}
{{ if $serviceSpec.resources.memory }} {{- if $serviceSpec.resources.limits.memory }}
memory: "{{ $serviceSpec.resources.memory }}" memory: "{{ $serviceSpec.resources.limits.memory }}"
{{ end }} {{- end }}
{{ if $serviceSpec.resources.gpu }} {{- if $serviceSpec.resources.limits.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}" {{ $limitsGpuResourceName }}: "{{ $serviceSpec.resources.limits.gpu }}"
{{ end }} {{- end }}
{{ end }} {{- end }}
{{- end }}
{{- end }}
{{- if $serviceSpec.envFromSecret }} {{- if $serviceSpec.envFromSecret }}
envFrom: envFrom:
- secretRef: - secretRef:
......
...@@ -39,27 +39,45 @@ spec: ...@@ -39,27 +39,45 @@ spec:
- name: main - name: main
image: {{ $serviceSpec.extraPodSpec.mainContainer.image }} image: {{ $serviceSpec.extraPodSpec.mainContainer.image }}
{{- if $serviceSpec.resources }} {{- if $serviceSpec.resources }}
{{- $hasResources := false }}
{{- if or $serviceSpec.resources.requests $serviceSpec.resources.limits }}
{{- $hasResources = true }}
{{- end }}
{{- if $hasResources }}
resources: resources:
{{- if $serviceSpec.resources.requests }}
{{- $requestsGpuResourceName := "nvidia.com/gpu" }}
{{- if $serviceSpec.resources.requests.gpuType }}
{{- $requestsGpuResourceName = $serviceSpec.resources.requests.gpuType }}
{{- end }}
requests: requests:
{{- if $serviceSpec.resources.cpu }} {{- if $serviceSpec.resources.requests.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}" cpu: "{{ $serviceSpec.resources.requests.cpu }}"
{{- end }} {{- end }}
{{- if $serviceSpec.resources.memory }} {{- if $serviceSpec.resources.requests.memory }}
memory: "{{ $serviceSpec.resources.memory }}" memory: "{{ $serviceSpec.resources.requests.memory }}"
{{- end }} {{- end }}
{{- if $serviceSpec.resources.gpu }} {{- if $serviceSpec.resources.requests.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}" {{ $requestsGpuResourceName }}: "{{ $serviceSpec.resources.requests.gpu }}"
{{- end }} {{- end }}
{{- end }}
{{- if $serviceSpec.resources.limits }}
{{- $limitsGpuResourceName := "nvidia.com/gpu" }}
{{- if $serviceSpec.resources.limits.gpuType }}
{{- $limitsGpuResourceName = $serviceSpec.resources.limits.gpuType }}
{{- end }}
limits: limits:
{{- if $serviceSpec.resources.cpu }} {{- if $serviceSpec.resources.limits.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}" cpu: "{{ $serviceSpec.resources.limits.cpu }}"
{{- end }} {{- end }}
{{- if $serviceSpec.resources.memory }} {{- if $serviceSpec.resources.limits.memory }}
memory: "{{ $serviceSpec.resources.memory }}" memory: "{{ $serviceSpec.resources.limits.memory }}"
{{- end }} {{- end }}
{{- if $serviceSpec.resources.gpu }} {{- if $serviceSpec.resources.limits.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}" {{ $limitsGpuResourceName }}: "{{ $serviceSpec.resources.limits.gpu }}"
{{- end }} {{- end }}
{{- end }}
{{- end }}
{{- end }} {{- end }}
workingDir: {{ $serviceSpec.extraPodSpec.mainContainer.workingDir }} workingDir: {{ $serviceSpec.extraPodSpec.mainContainer.workingDir }}
{{- if $serviceSpec.extraPodSpec.mainContainer.command }} {{- if $serviceSpec.extraPodSpec.mainContainer.command }}
......
...@@ -123,6 +123,8 @@ _Appears in:_ ...@@ -123,6 +123,8 @@ _Appears in:_
| `created` _boolean_ | Created indicates whether the DGD has been successfully created.<br />Used to prevent recreation if the DGD is manually deleted by users. | | | | `created` _boolean_ | Created indicates whether the DGD has been successfully created.<br />Used to prevent recreation if the DGD is manually deleted by users. | | |
#### DynamoComponentDeployment #### DynamoComponentDeployment
...@@ -423,6 +425,41 @@ _Appears in:_ ...@@ -423,6 +425,41 @@ _Appears in:_
| `ready` _boolean_ | Ready indicates whether the endpoint is ready to serve traffic<br />For LoRA models: true if the POST /loras request succeeded with a 2xx status code<br />For base models: always false (no probing performed) | | | | `ready` _boolean_ | Ready indicates whether the endpoint is ready to serve traffic<br />For LoRA models: true if the POST /loras request succeeded with a 2xx status code<br />For base models: always false (no probing performed) | | |
#### ExtraPodMetadata
_Appears in:_
- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec)
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `annotations` _object (keys:string, values:string)_ | | | |
| `labels` _object (keys:string, values:string)_ | | | |
#### ExtraPodSpec
_Appears in:_
- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec)
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `mainContainer` _[Container](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#container-v1-core)_ | | | |
#### IngressSpec #### IngressSpec
...@@ -465,6 +502,8 @@ _Appears in:_ ...@@ -465,6 +502,8 @@ _Appears in:_
| `secretName` _string_ | SecretName is the name of a Kubernetes Secret containing the TLS certificate and key. | | | | `secretName` _string_ | SecretName is the name of a Kubernetes Secret containing the TLS certificate and key. | | |
#### ModelReference #### ModelReference
...@@ -556,6 +595,46 @@ _Appears in:_ ...@@ -556,6 +595,46 @@ _Appears in:_
| `profilerImage` _string_ | ProfilerImage specifies the container image to use for profiling jobs.<br />This image contains the profiler code and dependencies needed for SLA-based profiling.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1" | | Required: \{\} <br /> | | `profilerImage` _string_ | ProfilerImage specifies the container image to use for profiling jobs.<br />This image contains the profiler code and dependencies needed for SLA-based profiling.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1" | | Required: \{\} <br /> |
#### ResourceItem
_Appears in:_
- [Resources](#resources)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `cpu` _string_ | CPU specifies the CPU resource request/limit (e.g., "1000m", "2") | | |
| `memory` _string_ | Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi") | | |
| `gpu` _string_ | GPU indicates the number of GPUs to request.<br />Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. | | |
| `gpuType` _string_ | GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"<br />By default if not specified, the GPU type is "nvidia.com/gpu" | | |
| `custom` _object (keys:string, values:string)_ | Custom specifies additional custom resource requests/limits | | |
#### Resources
Resources defines requested and limits for a component, including CPU, memory,
GPUs/devices, and any runtime-specific resources.
_Appears in:_
- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec)
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `requests` _[ResourceItem](#resourceitem)_ | Requests specifies the minimum resources required by the component | | |
| `limits` _[ResourceItem](#resourceitem)_ | Limits specifies the maximum resources allowed for the component | | |
| `claims` _[ResourceClaim](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#resourceclaim-v1-core) array_ | Claims specifies resource claims for dynamic resource allocation | | |
#### SharedMemorySpec #### SharedMemorySpec
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment