Unverified Commit a2874fdc authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

feat: add possibility to use grove in dynamo graph helm chart (#1954)

parent fe718fd2
...@@ -40,6 +40,9 @@ spec: ...@@ -40,6 +40,9 @@ spec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args: args:
- "python3 -m dynamo.frontend --http-port 8000" - "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker: VllmDecodeWorker:
......
...@@ -40,6 +40,9 @@ spec: ...@@ -40,6 +40,9 @@ spec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args: args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv" - "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
VllmDecodeWorker: VllmDecodeWorker:
......
...@@ -40,6 +40,9 @@ spec: ...@@ -40,6 +40,9 @@ spec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args: args:
- "python3 -m dynamo.frontend --http-port 8000" - "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker: VllmDecodeWorker:
......
...@@ -40,6 +40,9 @@ spec: ...@@ -40,6 +40,9 @@ spec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args: args:
- "python3 -m dynamo.frontend --http-port 8000" - "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker: VllmDecodeWorker:
...@@ -79,6 +82,9 @@ spec: ...@@ -79,6 +82,9 @@ spec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args: args:
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log" - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
VllmPrefillWorker: VllmPrefillWorker:
......
...@@ -40,6 +40,9 @@ spec: ...@@ -40,6 +40,9 @@ spec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args: args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv" - "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
VllmDecodeWorker: VllmDecodeWorker:
......
...@@ -26,6 +26,7 @@ This approach allows you to install Dynamo directly using a DynamoGraphDeploymen ...@@ -26,6 +26,7 @@ This approach allows you to install Dynamo directly using a DynamoGraphDeploymen
- Kubernetes 1.16+ - Kubernetes 1.16+
- ETCD v3.5+ (without auth) - ETCD v3.5+ (without auth)
- NATS v2.10+ (with jetstream enabled) - NATS v2.10+ (with jetstream enabled)
- Grove v0.1.0+ (optional if deploying using Grove)
### Basic Installation ### Basic Installation
...@@ -33,6 +34,13 @@ Here is how you would install a VLLM inference backend example. ...@@ -33,6 +34,13 @@ Here is how you would install a VLLM inference backend example.
```bash ```bash
helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml
### Installation using Grove
Same example as above, but using Grove PodGangSet resources.
```bash
helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml --set deploymentType=grove
``` ```
### Customizable Properties ### Customizable Properties
...@@ -54,6 +62,7 @@ helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud \ ...@@ -54,6 +62,7 @@ helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud \
| `imagePullSecrets` | Array of image pull secrets for accessing private registries | `imagePullSecrets[0].name=docker-secret-1` | | `imagePullSecrets` | Array of image pull secrets for accessing private registries | `imagePullSecrets[0].name=docker-secret-1` |
| `etcdAddr` | Address of the etcd service | `dynamo-platform-etcd:2379` | | `etcdAddr` | Address of the etcd service | `dynamo-platform-etcd:2379` |
| `natsAddr` | Address of the NATS messaging service | `nats://dynamo-platform-nats:4222` | | `natsAddr` | Address of the NATS messaging service | `nats://dynamo-platform-nats:4222` |
| `deploymentType` | Type of deployment to use. Can be `basic` or `grove`. If not specified, `basic` is used. | `deploymentType=grove` |
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# if deploymentType is empty, or explicitly set to basic, use basic as default
{{- if or (not .Values.deploymentType) (eq .Values.deploymentType "basic") -}}
{{- range $serviceName, $serviceSpec := .Values.spec.services }} {{- range $serviceName, $serviceSpec := .Values.spec.services }}
--- ---
apiVersion: apps/v1 apiVersion: apps/v1
...@@ -117,3 +119,4 @@ spec: ...@@ -117,3 +119,4 @@ spec:
scheme: HTTP scheme: HTTP
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- end }}
\ No newline at end of file
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if eq .Values.deploymentType "grove" }}
---
apiVersion: grove.io/v1alpha1
kind: PodGangSet
metadata:
name: {{ $.Release.Name }}
labels:
app: {{ $.Release.Name }}
spec:
replicas: 1
template:
cliques:
{{- range $serviceName, $serviceSpec := .Values.spec.services }}
- name: {{ $serviceName | lower }}
spec:
roleName: {{ $serviceName | lower }}
replicas: {{ $serviceSpec.replicas }}
podSpec:
{{- if $.Values.imagePullSecrets }}
imagePullSecrets:
{{ $.Values.imagePullSecrets | toYaml | nindent 12 }}
{{- end }}
containers:
- name: main
image: {{ $serviceSpec.extraPodSpec.mainContainer.image }}
{{- if $serviceSpec.resources }}
resources:
requests:
{{- if $serviceSpec.resources.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}"
{{- end }}
{{- if $serviceSpec.resources.memory }}
memory: "{{ $serviceSpec.resources.memory }}"
{{- end }}
{{- if $serviceSpec.resources.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
{{- end }}
limits:
{{- if $serviceSpec.resources.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}"
{{- end }}
{{- if $serviceSpec.resources.memory }}
memory: "{{ $serviceSpec.resources.memory }}"
{{- end }}
{{- if $serviceSpec.resources.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
{{- end }}
{{- end }}
workingDir: {{ $serviceSpec.extraPodSpec.mainContainer.workingDir }}
{{- if $serviceSpec.extraPodSpec.mainContainer.command }}
command:
{{- $serviceSpec.extraPodSpec.mainContainer.command | toYaml | nindent 14 }}
{{- end }}
{{- if $serviceSpec.extraPodSpec.mainContainer.args }}
args:
{{- $serviceSpec.extraPodSpec.mainContainer.args | toYaml | nindent 14 }}
{{- end }}
env:
- name: DYNAMO_PORT
value: "{{ $.Values.dynamoPort | default 8000 }}"
{{- if $.Values.etcdAddr }}
- name: ETCD_ENDPOINTS
value: "{{ $.Values.etcdAddr }}"
{{- end }}
{{- if $.Values.natsAddr }}
- name: NATS_SERVER
value: "{{ $.Values.natsAddr }}"
{{- end }}
{{- if $serviceSpec.envFromSecret }}
envFrom:
- secretRef:
name: {{ $serviceSpec.envFromSecret }}
{{- end }}
ports:
- name: health
containerPort: {{ $.Values.healthPort | default 5000 }}
livenessProbe:
{{- if $serviceSpec.livenessProbe }}
{{ $serviceSpec.livenessProbe | toYaml | nindent 14 }}
{{- else }}
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 5
failureThreshold: 10
successThreshold: 1
httpGet:
path: /healthz
port: health
scheme: HTTP
{{- end }}
readinessProbe:
{{- if $serviceSpec.readinessProbe }}
{{ $serviceSpec.readinessProbe | toYaml | nindent 14 }}
{{- else }}
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 5
failureThreshold: 10
successThreshold: 1
httpGet:
path: /readyz
port: health
scheme: HTTP
{{- end }}
{{- end }}
{{- end }}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment