Unverified Commit 9caafa55 authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

feat: add custom gpu type to CRD (#4408)

parent 33d9ae78
......@@ -34,7 +34,6 @@ import (
"k8s.io/utils/ptr"
grovev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
......@@ -63,15 +62,15 @@ type Autoscaling struct {
}
type Config struct {
Dynamo *DynamoConfig `yaml:"dynamo,omitempty"`
Resources *Resources `yaml:"resources,omitempty"`
Traffic *Traffic `yaml:"traffic,omitempty"`
Autoscaling *Autoscaling `yaml:"autoscaling,omitempty"`
HttpExposed bool `yaml:"http_exposed,omitempty"`
ApiEndpoints []string `yaml:"api_endpoints,omitempty"`
Workers *int32 `yaml:"workers,omitempty"`
TotalGpus *int32 `yaml:"total_gpus,omitempty"`
ExtraPodSpec *common.ExtraPodSpec `yaml:"extraPodSpec,omitempty"`
Dynamo *DynamoConfig `yaml:"dynamo,omitempty"`
Resources *Resources `yaml:"resources,omitempty"`
Traffic *Traffic `yaml:"traffic,omitempty"`
Autoscaling *Autoscaling `yaml:"autoscaling,omitempty"`
HttpExposed bool `yaml:"http_exposed,omitempty"`
ApiEndpoints []string `yaml:"api_endpoints,omitempty"`
Workers *int32 `yaml:"workers,omitempty"`
TotalGpus *int32 `yaml:"total_gpus,omitempty"`
ExtraPodSpec *v1alpha1.ExtraPodSpec `yaml:"extraPodSpec,omitempty"`
}
type ServiceConfig struct {
......@@ -150,7 +149,7 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD
if component.ComponentType == commonconsts.ComponentTypePlanner {
// ensure that the extraPodSpec is not nil
if deployment.Spec.ExtraPodSpec == nil {
deployment.Spec.ExtraPodSpec = &common.ExtraPodSpec{}
deployment.Spec.ExtraPodSpec = &v1alpha1.ExtraPodSpec{}
}
// ensure that the embedded PodSpec struct is not nil
if deployment.Spec.ExtraPodSpec.PodSpec == nil {
......@@ -231,10 +230,10 @@ func overrideWithDynDeploymentConfig(ctx context.Context, dynamoDeploymentCompon
dynamoDeploymentComponent.Spec.Replicas = componentDynConfig.ServiceArgs.Workers
}
if componentDynConfig.ServiceArgs != nil && componentDynConfig.ServiceArgs.Resources != nil {
requests := &common.ResourceItem{}
limits := &common.ResourceItem{}
requests := &v1alpha1.ResourceItem{}
limits := &v1alpha1.ResourceItem{}
if dynamoDeploymentComponent.Spec.Resources == nil {
dynamoDeploymentComponent.Spec.Resources = &common.Resources{
dynamoDeploymentComponent.Spec.Resources = &v1alpha1.Resources{
Requests: requests,
Limits: limits,
}
......
......@@ -66,29 +66,47 @@ spec:
- -m
- dynamo.frontend
{{- end }}
{{ if $serviceSpec.resources }}
{{- if $serviceSpec.resources }}
{{- $hasResources := false }}
{{- if or $serviceSpec.resources.requests $serviceSpec.resources.limits }}
{{- $hasResources = true }}
{{- end }}
{{- if $hasResources }}
resources:
{{- if $serviceSpec.resources.requests }}
{{- $requestsGpuResourceName := "nvidia.com/gpu" }}
{{- if $serviceSpec.resources.requests.gpuType }}
{{- $requestsGpuResourceName = $serviceSpec.resources.requests.gpuType }}
{{- end }}
requests:
{{ if $serviceSpec.resources.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}"
{{ end }}
{{ if $serviceSpec.resources.memory }}
memory: "{{ $serviceSpec.resources.memory }}"
{{ end }}
{{ if $serviceSpec.resources.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
{{ end }}
{{- if $serviceSpec.resources.requests.cpu }}
cpu: "{{ $serviceSpec.resources.requests.cpu }}"
{{- end }}
{{- if $serviceSpec.resources.requests.memory }}
memory: "{{ $serviceSpec.resources.requests.memory }}"
{{- end }}
{{- if $serviceSpec.resources.requests.gpu }}
{{ $requestsGpuResourceName }}: "{{ $serviceSpec.resources.requests.gpu }}"
{{- end }}
{{- end }}
{{- if $serviceSpec.resources.limits }}
{{- $limitsGpuResourceName := "nvidia.com/gpu" }}
{{- if $serviceSpec.resources.limits.gpuType }}
{{- $limitsGpuResourceName = $serviceSpec.resources.limits.gpuType }}
{{- end }}
limits:
{{ if $serviceSpec.resources.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}"
{{ end }}
{{ if $serviceSpec.resources.memory }}
memory: "{{ $serviceSpec.resources.memory }}"
{{ end }}
{{ if $serviceSpec.resources.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
{{ end }}
{{ end }}
{{- if $serviceSpec.resources.limits.cpu }}
cpu: "{{ $serviceSpec.resources.limits.cpu }}"
{{- end }}
{{- if $serviceSpec.resources.limits.memory }}
memory: "{{ $serviceSpec.resources.limits.memory }}"
{{- end }}
{{- if $serviceSpec.resources.limits.gpu }}
{{ $limitsGpuResourceName }}: "{{ $serviceSpec.resources.limits.gpu }}"
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- if $serviceSpec.envFromSecret }}
envFrom:
- secretRef:
......
......@@ -39,27 +39,45 @@ spec:
- name: main
image: {{ $serviceSpec.extraPodSpec.mainContainer.image }}
{{- if $serviceSpec.resources }}
{{- $hasResources := false }}
{{- if or $serviceSpec.resources.requests $serviceSpec.resources.limits }}
{{- $hasResources = true }}
{{- end }}
{{- if $hasResources }}
resources:
{{- if $serviceSpec.resources.requests }}
{{- $requestsGpuResourceName := "nvidia.com/gpu" }}
{{- if $serviceSpec.resources.requests.gpuType }}
{{- $requestsGpuResourceName = $serviceSpec.resources.requests.gpuType }}
{{- end }}
requests:
{{- if $serviceSpec.resources.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}"
{{- if $serviceSpec.resources.requests.cpu }}
cpu: "{{ $serviceSpec.resources.requests.cpu }}"
{{- end }}
{{- if $serviceSpec.resources.memory }}
memory: "{{ $serviceSpec.resources.memory }}"
{{- if $serviceSpec.resources.requests.memory }}
memory: "{{ $serviceSpec.resources.requests.memory }}"
{{- end }}
{{- if $serviceSpec.resources.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
{{- if $serviceSpec.resources.requests.gpu }}
{{ $requestsGpuResourceName }}: "{{ $serviceSpec.resources.requests.gpu }}"
{{- end }}
{{- end }}
{{- if $serviceSpec.resources.limits }}
{{- $limitsGpuResourceName := "nvidia.com/gpu" }}
{{- if $serviceSpec.resources.limits.gpuType }}
{{- $limitsGpuResourceName = $serviceSpec.resources.limits.gpuType }}
{{- end }}
limits:
{{- if $serviceSpec.resources.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}"
{{- if $serviceSpec.resources.limits.cpu }}
cpu: "{{ $serviceSpec.resources.limits.cpu }}"
{{- end }}
{{- if $serviceSpec.resources.memory }}
memory: "{{ $serviceSpec.resources.memory }}"
{{- if $serviceSpec.resources.limits.memory }}
memory: "{{ $serviceSpec.resources.limits.memory }}"
{{- end }}
{{- if $serviceSpec.resources.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
{{- if $serviceSpec.resources.limits.gpu }}
{{ $limitsGpuResourceName }}: "{{ $serviceSpec.resources.limits.gpu }}"
{{- end }}
{{- end }}
{{- end }}
{{- end }}
workingDir: {{ $serviceSpec.extraPodSpec.mainContainer.workingDir }}
{{- if $serviceSpec.extraPodSpec.mainContainer.command }}
......
......@@ -123,6 +123,8 @@ _Appears in:_
| `created` _boolean_ | Created indicates whether the DGD has been successfully created.<br />Used to prevent recreation if the DGD is manually deleted by users. | | |
#### DynamoComponentDeployment
......@@ -423,6 +425,41 @@ _Appears in:_
| `ready` _boolean_ | Ready indicates whether the endpoint is ready to serve traffic<br />For LoRA models: true if the POST /loras request succeeded with a 2xx status code<br />For base models: always false (no probing performed) | | |
#### ExtraPodMetadata
_Appears in:_
- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec)
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `annotations` _object (keys:string, values:string)_ | | | |
| `labels` _object (keys:string, values:string)_ | | | |
#### ExtraPodSpec
_Appears in:_
- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec)
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `mainContainer` _[Container](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#container-v1-core)_ | | | |
#### IngressSpec
......@@ -465,6 +502,8 @@ _Appears in:_
| `secretName` _string_ | SecretName is the name of a Kubernetes Secret containing the TLS certificate and key. | | |
#### ModelReference
......@@ -556,6 +595,46 @@ _Appears in:_
| `profilerImage` _string_ | ProfilerImage specifies the container image to use for profiling jobs.<br />This image contains the profiler code and dependencies needed for SLA-based profiling.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1" | | Required: \{\} <br /> |
#### ResourceItem
_Appears in:_
- [Resources](#resources)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `cpu` _string_ | CPU specifies the CPU resource request/limit (e.g., "1000m", "2") | | |
| `memory` _string_ | Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi") | | |
| `gpu` _string_ | GPU indicates the number of GPUs to request.<br />Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. | | |
| `gpuType` _string_ | GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"<br />By default if not specified, the GPU type is "nvidia.com/gpu" | | |
| `custom` _object (keys:string, values:string)_ | Custom specifies additional custom resource requests/limits | | |
#### Resources
Resources defines requested and limits for a component, including CPU, memory,
GPUs/devices, and any runtime-specific resources.
_Appears in:_
- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec)
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `requests` _[ResourceItem](#resourceitem)_ | Requests specifies the minimum resources required by the component | | |
| `limits` _[ResourceItem](#resourceitem)_ | Limits specifies the maximum resources allowed for the component | | |
| `claims` _[ResourceClaim](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#resourceclaim-v1-core) array_ | Claims specifies resource claims for dynamic resource allocation | | |
#### SharedMemorySpec
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment