Unverified Commit d9e8d334 authored by atchernych's avatar atchernych Committed by GitHub
Browse files

fix: adjust kgateway setup (#3963)


Signed-off-by: default avatarAnna Tchernych <atchernych@nvidia.com>
parent 3fe5653b
...@@ -13,7 +13,7 @@ spec: ...@@ -13,7 +13,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: gitlab-master.nvidia.com:5005/dl/ai-dynamo/dynamo/dynamo:vllm-1029
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg dynamoNamespace: vllm-agg
...@@ -24,12 +24,12 @@ spec: ...@@ -24,12 +24,12 @@ spec:
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: gitlab-master.nvidia.com:5005/dl/ai-dynamo/dynamo/dynamo:vllm-1029
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- python3 - python3
- -m - -m
- dynamo.vllm - dynamo.vllm
args: args:
- --model - --model
- Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
...@@ -56,25 +56,6 @@ INFERENCE_EXTENSION_VERSION=v0.5.1 ...@@ -56,25 +56,6 @@ INFERENCE_EXTENSION_VERSION=v0.5.1
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml
``` ```
c. Install `kgateway` CRDs and kgateway.
```bash
KGATEWAY_VERSION=v2.0.3
# Install the Kgateway CRDs
helm upgrade -i --create-namespace --namespace kgateway-system --version $KGATEWAY_VERSION kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
# Install Kgateway
helm upgrade -i --namespace kgateway-system --version $KGATEWAY_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true
```
d. Deploy the Gateway Instance
```bash
kubectl create namespace my-model
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml -n my-model
```
```bash ```bash
kubectl get gateway inference-gateway -n my-model kubectl get gateway inference-gateway -n my-model
......
...@@ -32,13 +32,15 @@ INFERENCE_EXTENSION_VERSION=v0.5.1 ...@@ -32,13 +32,15 @@ INFERENCE_EXTENSION_VERSION=v0.5.1
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml -n $MODEL_NAMESPACE kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml -n $MODEL_NAMESPACE
# Install the Kgateway CRDs and Kgateway # Install and upgrade Kgateway (includes CRDs)
KGATEWAY_VERSION=v2.0.3 KGATEWAY_VERSION=v2.0.3
KGATEWAY_SYSTEM_NAMESPACE=kgateway-system KGATEWAY_SYSTEM_NAMESPACE=kgateway-system
helm repo add kgateway-dev oci://cr.kgateway.dev/kgateway-dev || true kubectl create namespace $KGATEWAY_SYSTEM_NAMESPACE || true
helm upgrade -i --create-namespace --namespace $KGATEWAY_SYSTEM_NAMESPACE --version $KGATEWAY_VERSION kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds helm upgrade -i --create-namespace --namespace $KGATEWAY_SYSTEM_NAMESPACE --version $KGATEWAY_VERSION kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
helm upgrade -i --namespace $KGATEWAY_SYSTEM_NAMESPACE --version $KGATEWAY_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true helm upgrade -i --namespace $KGATEWAY_SYSTEM_NAMESPACE --version $KGATEWAY_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true
# Deploy the Gateway Instance # Deploy the Gateway Instance
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml -n $MODEL_NAMESPACE kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v1.0.0/config/manifests/gateway/kgateway/gateway.yaml -n $MODEL_NAMESPACE
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment