Unverified Commit 84737baf authored by atchernych's avatar atchernych Committed by GitHub
Browse files

fix: rm dynamoNamespace from GAIE example (#4313)


Signed-off-by: default avatarAnna Tchernych <atchernych@nvidia.com>
Signed-off-by: default avataratchernych <atchernych@nvidia.com>
parent 611c213b
...@@ -111,13 +111,12 @@ cd deploy/inference-gateway ...@@ -111,13 +111,12 @@ cd deploy/inference-gateway
# Export the Dynamo image you have used when deploying your model in Step 3. # Export the Dynamo image you have used when deploying your model in Step 3.
export DYNAMO_IMAGE=<the-dynamo-image-you-have-used-when-deploying-the-model> export DYNAMO_IMAGE=<the-dynamo-image-you-have-used-when-deploying-the-model>
# Export the image tag provided by Dynamo (nvcr.io/nvstaging/ai-dynamo/epp-inference-extension-dynamo:v0.6.0-1) or you can build the Dynamo EPP image by following the commands later in this README. # Export the FrontEnd image tag provided by Dynamo (recommended) or build the Dynamo EPP image by following the commands later in this README.
export EPP_IMAGE=<the-epp-image-you-built> export EPP_IMAGE=<the-epp-image-you-built>
``` ```
```bash ```bash
helm upgrade --install dynamo-gaie ./helm/dynamo-gaie -n my-model -f ./vllm_agg_qwen.yaml --set-string extension.image=$EPP_IMAGE helm upgrade --install dynamo-gaie ./helm/dynamo-gaie -n my-model -f ./vllm_agg_qwen.yaml --set-string extension.image=$EPP_IMAGE
# do not include --set-string extension.image=$EPP_IMAGE to use the default images
``` ```
Key configurations include: Key configurations include:
...@@ -126,7 +125,7 @@ Key configurations include: ...@@ -126,7 +125,7 @@ Key configurations include:
- A service for the inference gateway - A service for the inference gateway
- Required RBAC roles and bindings - Required RBAC roles and bindings
- RBAC permissions - RBAC permissions
- values-dynamo-epp.yaml sets epp.dynamo.namespace=vllm-agg for the bundled example. Point it at your actual Dynamo namespace by editing that file or adding --set epp.dynamo.namespace=<namespace> (and likewise for epp.dynamo.component, epp.dynamo.kvBlockSize if they differ). - dynamoGraphDeploymentName - the name of the Dynamo Graph where your model is deployed.
**Configuration** **Configuration**
...@@ -147,7 +146,7 @@ You can configure the plugin by setting environment vars in your [values-dynamo- ...@@ -147,7 +146,7 @@ You can configure the plugin by setting environment vars in your [values-dynamo-
Dynamo provides a custom routing plugin `pkg/epp/scheduling/plugins/dynamo_kv_scorer/plugin.go` to perform efficient kv routing. Dynamo provides a custom routing plugin `pkg/epp/scheduling/plugins/dynamo_kv_scorer/plugin.go` to perform efficient kv routing.
The Dynamo router is built as a static library, the EPP router will call to provide fast inference. The Dynamo router is built as a static library, the EPP router will call to provide fast inference.
You can either use the image `nvcr.io/nvstaging/ai-dynamo/epp-inference-extension-dynamo:v0.6.0-1` for the EPP_IMAGE in the Helm deployment command and proceed to the step 2 or you can build the image yourself following the steps below. You can either use the special FrontEnd image for the EPP_IMAGE in the Helm deployment command and proceed to the step 2 or you can build the image yourself following the steps below.
##### 1. Build the custom EPP image ##### ##### 1. Build the custom EPP image #####
......
...@@ -65,6 +65,21 @@ app.kubernetes.io/name: {{ include "dynamo-gaie.name" . }} ...@@ -65,6 +65,21 @@ app.kubernetes.io/name: {{ include "dynamo-gaie.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }} app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }} {{- end }}
{{/*
Resolve the fully qualified Dynamo namespace as "<release namespace>-<dynamoGraphDeploymentName>"
*/}}
{{- define "dynamo-gaie.dynamoNamespace" -}}
{{- $dgdName := (.Values.dynamoGraphDeploymentName | default "") | trim -}}
{{- if not $dgdName }}
{{- fail "set dynamoGraphDeploymentName to derive the Dynamo namespace" }}
{{- end }}
{{- $releaseNamespace := (.Release.Namespace | default "") | trim -}}
{{- if not $releaseNamespace }}
{{- fail "Release.Namespace must be set to derive the Dynamo namespace" }}
{{- end }}
{{- printf "%s-%s" $releaseNamespace $dgdName }}
{{- end }}
{{/* {{/*
Create the name of the service account to use Create the name of the service account to use
*/}} */}}
......
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
{{- $platformNs := default .Release.Namespace .Values.platformNamespace -}} {{- $platformNs := default .Release.Namespace .Values.platformNamespace -}}
{{- $platformName := default "dynamo-platform" .Values.platformReleaseName -}} {{- $platformName := default "dynamo-platform" .Values.platformReleaseName -}}
{{- $useDynamo := default false .Values.epp.useDynamo -}} {{- $useDynamo := default false .Values.epp.useDynamo -}}
{{- $dynNsAll := default .Values.dynamoNamespace .Values.epp.dynamo.namespace -}} {{- $resolvedDynNs := (include "dynamo-gaie.dynamoNamespace" .) | trim -}}
{{- $ns := ternary (required "set epp.dynamo.namespace (or top-level dynamoNamespace) when epp.useDynamo=true" $dynNsAll) "" $useDynamo -}} {{- $ns := ternary (required "set dynamoGraphDeploymentName when epp.useDynamo=true" $resolvedDynNs) "" $useDynamo -}}
{{- $kv := default "16" .Values.epp.dynamo.kvBlockSize -}} {{- $kv := default "16" .Values.epp.dynamo.kvBlockSize -}}
{{- $std := .Values.extension.standardImage -}} {{- $std := .Values.extension.standardImage -}}
{{- $dyn := .Values.extension.dynamoImage -}} {{- $dyn := .Values.extension.dynamoImage -}}
...@@ -91,7 +91,7 @@ spec: ...@@ -91,7 +91,7 @@ spec:
value: "{{ $platformName }}-etcd.{{ $platformNs }}:2379" value: "{{ $platformName }}-etcd.{{ $platformNs }}:2379"
- name: NATS_SERVER - name: NATS_SERVER
value: "nats://{{ $platformName }}-nats.{{ $platformNs }}:4222" value: "nats://{{ $platformName }}-nats.{{ $platformNs }}:4222"
- name: DYN_NAMESPACE - name: DYNAMO_NAMESPACE
value: "{{ $ns }}" value: "{{ $ns }}"
- name: DYNAMO_KV_BLOCK_SIZE - name: DYNAMO_KV_BLOCK_SIZE
value: "{{ $kv }}" value: "{{ $kv }}"
......
...@@ -21,7 +21,7 @@ spec: ...@@ -21,7 +21,7 @@ spec:
targetPortNumber: {{ .Values.inferencePool.port }} targetPortNumber: {{ .Values.inferencePool.port }}
selector: selector:
nvidia.com/dynamo-component: Frontend nvidia.com/dynamo-component: Frontend
nvidia.com/dynamo-namespace: {{ .Values.dynamoNamespace }} nvidia.com/dynamo-namespace: {{ include "dynamo-gaie.dynamoNamespace" . }}
extensionRef: extensionRef:
failureMode: FailOpen failureMode: FailOpen
group: "" group: ""
......
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
# This is a YAML-formatted file. # This is a YAML-formatted file.
# Declare variables to be passed into your templates. # Declare variables to be passed into your templates.
# This is the Dynamo namespace where the dynamo model is deployed # Name of the DynamoGraphDeployment (DGD) that backs this gateway integration
dynamoNamespace: "vllm-agg" dynamoGraphDeploymentName: "vllm-agg"
# This is the port on which the model is exposed # This is the port on which the model is exposed
model: model:
...@@ -72,7 +72,6 @@ epp: ...@@ -72,7 +72,6 @@ epp:
# Dynamo-specific settings (only used when useDynamo: true) # Dynamo-specific settings (only used when useDynamo: true)
configFile: "/etc/epp/epp-config-dynamo.yaml" configFile: "/etc/epp/epp-config-dynamo.yaml"
dynamo: dynamo:
namespace: "vllm-agg" # Required when useDynamo: true.
kvBlockSize: "16" kvBlockSize: "16"
# Platform configuration (for Dynamo mode) # Platform configuration (for Dynamo mode)
......
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
# This is a YAML-formatted file. # This is a YAML-formatted file.
# Declare variables to be passed into your templates. # Declare variables to be passed into your templates.
# This is the Dynamo namespace where the dynamo model is deployed # Name of the DynamoGraphDeployment (DGD) backing this model deployment.
dynamoNamespace: "my-model-vllm-agg" dynamoGraphDeploymentName: "vllm-agg"
# This is the port on which the model is exposed # This is the port on which the model is exposed
model: model:
......
...@@ -13,7 +13,7 @@ spec: ...@@ -13,7 +13,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: gitlab-master.nvidia.com:5005/dl/ai-dynamo/dynamo/dynamo:vllm-1029 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg dynamoNamespace: vllm-agg
...@@ -24,7 +24,7 @@ spec: ...@@ -24,7 +24,7 @@ spec:
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: gitlab-master.nvidia.com:5005/dl/ai-dynamo/dynamo/dynamo:vllm-1029 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- python3 - python3
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment