# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. {{- $component_name := "" }} {{- with $.Values.component }} {{- $component_name = required "Property `.component.name` is required." .name }} {{- with .namespace }} {{- $component_namespace = (lower .) }} {{- end }} {{- else }} {{- fail "Property `.component` is required." }} {{- end }} {{- $container_image_name := "" }} {{- with $.Values.image }} {{- $container_image_name = required "Property `.image.name` is required." .name }} {{- else }} {{- fail "Property `.image` is required." }} {{- end }} {{- $component_namespace := "default" }} {{- $instance_count := 1 }} {{- $kube_grace_period := 30 }} {{- $kube_liveness_delay := 10 }} {{- $kube_liveness_enabled := true }} {{- $kube_liveness_fail := 15 }} {{- $kube_liveness_period := 2 }} {{- $kube_liveness_success := 1 }} {{- $kube_readiness_delay := 10 }} {{- $kube_readiness_enabled := true }} {{- $kube_readiness_fail := 15 }} {{- $kube_readiness_period := 2 }} {{- $kube_readiness_success := 1 }} {{- $kube_tolerations_count := 0 }} {{- $model_repository_path := "/var/run/models" }} {{- $parallel_pipeline := 1 }} {{- $parallel_tensor := 1 }} {{- $parallel_world := 1 }} {{- $port_api := 443 }} {{- $port_health := 8000 }} {{- $port_metrics := 9347 }} {{- $port_request := 9345 }} {{- $request_plane_etcd_url := "" }} {{- $request_plane_nats_url := "" }} {{- with $.Values.distributed }} {{- with .requestPlane }} {{- $request_plane_etcd_url = required "Property `.distributed.requestPlane.etcdUrl` is required." .etcdUrl }} {{- $request_plane_nats_url = required "Property `.distributed.requestPlane.natsUrl` is required." .natsUrl }} {{- else }} {{- fail "Property `.distributed.requestPlane` is required." }} {{- end }} {{- else }} {{- fail "Property `.distributed` is required." }} {{- end }} {{- $request_plane_timeout := 60 }} {{- $resources_cpu := 4 }} {{- $resources_ephemeral := "1Gi" }} {{- $resources_gpu := 1 }} {{- $resources_memory := "16Gi" }} {{- $resources_shmem := "512Mi" }} {{- $worker_count := 1 }} {{- with $.Values.kubernetes }} {{- with .checks }} {{- with .liveness }} {{- $kube_liveness_enabled = ne false .enabled }} {{- with .failureThreshold }} {{- $kube_liveness_fail = (int .) }} {{- if le $kube_liveness_fail 0 }} {{- fail "The value of property `.kubernetes.checks.liveness.failureThreshold` must be greater than zero." }} {{- end }} {{- end }} {{- with .initialDelaySeconds }} {{- $kube_liveness_delay = (int .) }} {{- if le $kube_liveness_delay 0 }} {{- fail "The value of property `.kubernetes.checks.liveness.initialDelaySeconds` must be greater than zero." }} {{- end }} {{- end }} {{- with .periodSeconds }} {{- $kube_liveness_period = (int .) }} {{- if le $kube_liveness_period 0 }} {{- fail "The value of property `.kubernetes.checks.liveness.periodSeconds` must be greater than zero." }} {{- end }} {{- end }} {{- with .successThreshold }} {{- $kube_liveness_success = (int .) }} {{- if le $kube_liveness_success 0 }} {{- fail "The value of property `.kubernetes.checks.liveness.successThreshold` must be greater than zero." }} {{- end }} {{- end }} {{- end }} {{- with .readiness }} {{- $kube_readiness_enabled = ne false .enabled }} {{- with .failureThreshold }} {{- $kube_readiness_fail = (int .) }} {{- if le $kube_readiness_fail 0 }} {{- fail "The value of property `.kubernetes.checks.readiness.failureThreshold` must be greater than zero." }} {{- end }} {{- end }} {{- with .initialDelaySeconds }} {{- $kube_readiness_delay = (int .) }} {{- if le $kube_readiness_delay 0 }} {{- fail "The value of property `.kubernetes.checks.readiness.initialDelaySeconds` must be greater than zero." }} {{- end }} {{- end }} {{- with .periodSeconds }} {{- $kube_readiness_period = (int .) }} {{- if le $kube_readiness_period 0 }} {{- fail "The value of property `.kubernetes.checks.readiness.periodSeconds` must be greater than zero." }} {{- end }} {{- end }} {{- with .successThreshold }} {{- $kube_readiness_success = (int .) }} {{- if le $kube_readiness_success 0 }} {{- fail "The value of property `.kubernetes.checks.readiness.successThreshold` must be greater than zero." }} {{- end }} {{- end }} {{- end }} {{- end }} {{- with .terminationGracePeriod }} {{- $kube_grace_period = (int .)}} {{- if le $kube_grace_period 0 }} {{- fail "The value of property `.kubernetes.terminationGracePeriod` must be greater than zero." }} {{- end }} {{- end }} {{- with .tolerations }} {{- if gt (len .) 0 }} {{- $kube_tolerations_count = (len .) }} {{- end }} {{- end }} {{- end }} {{- with $.Values.distributed }} {{- with .requestPlane }} {{- with .timeout }} {{- $request_plane_timeout = (int .) }} {{- end }} {{- end }} {{- with .workerCount }} {{- $worker_count = (int .) }} {{- end }} {{- end }} {{- with $.Values.model }} {{- with .instance }} {{- with .count }} {{- $instance_count = (int .) }} {{- if le $instance_count 0 }} {{- fail "The value of property `.instance.count` must be greater than zero." }} {{- end }} {{- end }} {{- with .parallelism }} {{- with .pipeline }} {{- $parallel_pipeline = (int .) }} {{- if le $parallel_pipeline 0 }} {{- fail "The value of property `.instance.parallelism.pipeline` must be greater than zero." }} {{- end }} {{- end }} {{- with .tensor }} {{- $parallel_tensor = (int .) }} {{- if le $parallel_tensor 0 }} {{- fail "The value of property `.instance.parallelism.tensor` must be greater than zero." }} {{- end }} {{- end }} {{- $parallel_world = mul $parallel_pipeline $parallel_tensor }} {{- end }} {{- end }} {{- with .repository }} {{- with .path }} {{- $model_repository_path = . }} {{- end }} {{- end }} {{- end }} {{- with $.Values.ports }} {{- with .api }} {{- $port_api = (int .) }} {{- if le $port_api 0 }} {{- fail "The value of property `.ports.api` must be greater than zero." }} {{- end }} {{- if gt $port_api 65535 }} {{- fail "The value of property `.ports.api` must be less than 65,536." }} {{- end }} {{- end }} {{- with .health }} {{- $port_health = (int .) }} {{- if le $port_health 0 }} {{- fail "The value of property `.ports.health` must be greater than zero." }} {{- end }} {{- if gt $port_health 65535 }} {{- fail "The value of property `.ports.health` must be less than 65,536." }} {{- end }} {{- end }} {{- with .metrics }} {{- $port_metrics = (int .) }} {{- if le $port_metrics 0 }} {{- fail "The value of property `.ports.metrics` must be greater than zero." }} {{- end }} {{- if gt $port_metrics 65535 }} {{- fail "The value of property `.ports.metrics` must be less than 65,536." }} {{- end }} {{- end }} {{- with .request }} {{- $port_request = (int .) }} {{- if le $port_request 0 }} {{- fail "The value of property `.ports.request` must be greater than zero." }} {{- end }} {{- if gt $port_request 65535 }} {{- fail "The value of property `.ports.request` must be less than 65,536." }} {{- end }} {{- end }} {{- end }} {{- with $.Values.ports }} {{- with .api }} {{- $port_api = (int .) }} {{- if le $port_api 0 }} {{- fail "The value of property `.ports.api` must be greater than zero." }} {{- end }} {{- if gt $port_api 65535 }} {{- fail "The value of property `.ports.api` must be less than 65,536." }} {{- end }} {{- end }} {{- with .health }} {{- $port_health = (int .) }} {{- if le $port_health 0 }} {{- fail "The value of property `.ports.health` must be greater than zero." }} {{- end }} {{- if gt $port_health 65535 }} {{- fail "The value of property `.ports.health` must be less than 65,536." }} {{- end }} {{- end }} {{- with .metrics }} {{- $port_metrics = (int .) }} {{- if le $port_metrics 0 }} {{- fail "The value of property `.ports.metrics` must be greater than zero." }} {{- end }} {{- if gt $port_metrics 65535 }} {{- fail "The value of property `.ports.metrics` must be less than 65,536." }} {{- end }} {{- end }} {{- with .request }} {{- $port_request = (int .) }} {{- if le $port_request 0 }} {{- fail "The value of property `.ports.request` must be greater than zero." }} {{- end }} {{- if gt $port_request 65535 }} {{- fail "The value of property `.ports.request` must be less than 65,536." }} {{- end }} {{- end }} {{- end }} {{- with $.Values.resources }} {{- with .cpu }} {{- $resources_cpu = (int .) }} {{- if le $resources_cpu 0 }} {{- fail "The value of property `.resources.cpu` must be greater than zero." }} {{- end }} {{- end }} {{- with .gpu }} {{- with .count }} {{- $resources_gpu = (int .) }} {{- if lt $resources_cpu 0 }} {{- fail "The value of property `.resources.gpu.count` must be greater than or equal to zero." }} {{- end }} {{- end }} {{- end }} {{- with .ephemeral }} {{- $resources_ephemeral = . }} {{- end }} {{- with .memory }} {{- $resources_memory = . }} {{- end }} {{- with .sharedMemory }} {{- $resources_shmem = . }} {{- end }} {{- end }} apiVersion: apps/v1 kind: Deployment metadata: name: {{ $.Release.Name }} namespace: {{ $.Release.Namespace | quote }} annotations: {{- with $ }} {{- include "nvidia.annotations.chart" . | indent 4 }} {{- end }} labels: app: {{ $.Release.Name }} app.kubernetes.io/component: {{ $component_name }} {{- with $ }} {{- include "nvidia.labels.chart" . | indent 4 }} {{- end }} spec: selector: matchLabels: app: {{ $.Release.Name }} app.kubernetes.io/component: {{ $component_name }} replicas: {{ $instance_count }} template: metadata: annotations: {{- with $ }} {{- include "nvidia.annotations.chart" . | indent 8 }} {{- end }} labels: app: {{ $.Release.Name }} app.kubernetes.io/component: {{ $component_name }} {{- with $ }} {{- include "nvidia.labels.chart" . | indent 8 }} {{- end }} spec: {{- if ne $resources_gpu 0 }} affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: nvidia.com/gpu operator: Exists {{- with $.Values.resources }} {{- with .gpu }} {{- with .product }} {{- if gt (len .) 0 }} - key: nvidia.com/gpu.product operator: In values: {{ toYaml . | indent 16 }} {{- end }} {{- end }} {{- end }} {{- end }} {{- end }} containers: - name: server args: - uv - run - dynamo - start - --service-name - {{ $component_name | quote }} command: - sh - -c env: {{- if gt $parallel_world 1 }} - name: DYNEMO_LLM_PP value: {{ $parallel_pipeline }} - name: DYNEMO_LLM_TP value: {{ $parallel_tensor }} {{- end }} - name: DYNEMO_NAME value: {{ $component_name | quote }} {{- if gt (len $component_namespace) 0 }} - name: DYNEMO_NAMESPACE value: {{ $component_namespace | quote }} {{- end }} {{- if ne $port_health 8000 }} - name: DYNEMO_PORT_HEALTH value: {{ $port_health }} {{- end }} {{- if ne $port_metrics 9347 }} - name: DYNEMO_PORT_METRICS value: {{ $port_metrics }} {{- end }} {{- if ne $port_request 9345 }} - name: DYNEMO_PORT_REQUEST value: {{ $port_request }} {{- end }} - name: DYNEMO_RP_ETCD_URL value: {{ $request_plane_etcd_url }} - name: DYNEMO_RP_NATS_URL value: {{ $request_plane_nats_url }} - name: DYNEMO_RP_TIMEOUT value: {{ $request_plane_timeout }} {{- if gt $worker_count 1 }} - name: DYNEMO_WORKER_COUNT value: {{ $worker_count }} {{- end }} image: {{ $container_image_name }} imagePullPolicy: IfNotPresent image: {{ $container_image_name }} imagePullPolicy: IfNotPresent {{- if $kube_liveness_enabled }} livenessProbe: failureThreshold: {{ $kube_liveness_fail }} httpGet: path: /v2/health/live port: {{ $port_health }} initialDelaySeconds: {{ $kube_liveness_delay }} periodSeconds: {{ $kube_liveness_period }} successThreshold: {{ $kube_liveness_success }} {{- end }} ports: - containerPort: {{ $port_health }} name: health - containerPort: {{ $port_request }} name: request - containerPort: {{ $port_api }} name: api - containerPort: {{ $port_metrics }} name: metrics {{- if $kube_readiness_enabled }} readinessProbe: failureThreshold: {{ $kube_readiness_fail }} httpGet: path: /v2/health/ready port: {{ $port_health }} initialDelaySeconds: {{ $kube_readiness_delay }} periodSeconds: {{ $kube_readiness_period }} successThreshold: {{ $kube_readiness_success }} {{- end }} resources: limits: cpu: {{ $resources_cpu }} ephemeral-storage: {{ $resources_ephemeral }} {{- if gt $resources_gpu 0 }} nvidia.com/gpu: {{ $resources_gpu }} {{- end }} memory: {{ $resources_memory }} requests: cpu: {{ $resources_cpu }} ephemeral-storage: {{ $resources_ephemeral }} {{- if gt $resources_gpu 0 }} nvidia.com/gpu: {{ $resources_gpu }} {{- end }} memory: {{ $resources_memory }} volumeMounts: {{- with $.Values.model }} {{- with .repository }} {{- with .volumeMounts }} {{- range . }} {{- $mount_path := $model_repository_path }} {{- $volume_name := required "Property `.modelRepository.volumeMounts[*].name` is required." .name }} {{- if eq "shared-memory" $volume_name }} {{- fail "Property `.modelRepository.volumeMounts[*].name` cannot be `shared-memory` because it is a reserved name." }} {{- end }} {{- with .path }} {{- $mount_path = printf "%s/%s" $model_repository_path (trimPrefix "/" .) }} {{- if regexMatch "/\\.\\./?" $mount_path }} {{- fail (printf "Value of property `.modelRepository.volumeMounts[*].path` '%s' is illegal because '%s' is not a sub-directory of '%s'." . (clean $mount_path) $model_repository_path) }} {{- end }} {{- end }} - mountPath: {{ $mount_path }} name: {{ $volume_name }} {{- end }} {{- end }} {{- end }} {{- end }} - mountPath: /dev/shm name: shared-memory {{- with $.Values.image }} {{- with .pullSecrets }} {{- if len . }} imagePullSecrets: {{ toYaml . | indent 6 }} {{- end }} {{- end }} {{- end }} restartPolicy: Always terminationGracePeriodSeconds: 30 {{- if or (gt $resources_gpu 0) (gt $kube_tolerations_count 0) }} tolerations: {{- if gt $resources_gpu 0 }} - effect: NoSchedule key: nvidia.com/gpu operator: Exists {{- end }} {{- with $.Values.kubernetes }} {{- with .tolerations }} {{ toYaml . | indent 6 }} {{- end }} {{- end }} {{- end }} volumes: {{- with $.Values.model }} {{- with .repository }} {{- with .volumeMounts }} {{- range . }} - name: {{ required "Property `.modelRepository.volumeMounts.name` is required." .name }} persistentVolumeClaim: claimName: {{ required "Property `.modelRepository.volumeMounts.persistentVolumeClaim` is required." .persistentVolumeClaim }} {{- end }} {{- end }} {{- end }} {{- end }} - name: shared-memory emptyDir: medium: Memory sizeLimit: {{ $resources_shmem }}