Unverified Commit d9e8d334 authored by atchernych's avatar atchernych Committed by GitHub
Browse files

fix: adjust kgateway setup (#3963)


Signed-off-by: default avatarAnna Tchernych <atchernych@nvidia.com>
parent 3fe5653b
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image: gitlab-master.nvidia.com:5005/dl/ai-dynamo/dynamo/dynamo:vllm-1029
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg
......@@ -24,7 +24,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image: gitlab-master.nvidia.com:5005/dl/ai-dynamo/dynamo/dynamo:vllm-1029
workingDir: /workspace/components/backends/vllm
command:
- python3
......
......@@ -56,25 +56,6 @@ INFERENCE_EXTENSION_VERSION=v0.5.1
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml
```
c. Install `kgateway` CRDs and kgateway.
```bash
KGATEWAY_VERSION=v2.0.3
# Install the Kgateway CRDs
helm upgrade -i --create-namespace --namespace kgateway-system --version $KGATEWAY_VERSION kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
# Install Kgateway
helm upgrade -i --namespace kgateway-system --version $KGATEWAY_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true
```
d. Deploy the Gateway Instance
```bash
kubectl create namespace my-model
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml -n my-model
```
```bash
kubectl get gateway inference-gateway -n my-model
......
......@@ -32,13 +32,15 @@ INFERENCE_EXTENSION_VERSION=v0.5.1
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml -n $MODEL_NAMESPACE
# Install the Kgateway CRDs and Kgateway
# Install and upgrade Kgateway (includes CRDs)
KGATEWAY_VERSION=v2.0.3
KGATEWAY_SYSTEM_NAMESPACE=kgateway-system
helm repo add kgateway-dev oci://cr.kgateway.dev/kgateway-dev || true
kubectl create namespace $KGATEWAY_SYSTEM_NAMESPACE || true
helm upgrade -i --create-namespace --namespace $KGATEWAY_SYSTEM_NAMESPACE --version $KGATEWAY_VERSION kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
helm upgrade -i --namespace $KGATEWAY_SYSTEM_NAMESPACE --version $KGATEWAY_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true
# Deploy the Gateway Instance
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml -n $MODEL_NAMESPACE
\ No newline at end of file
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v1.0.0/config/manifests/gateway/kgateway/gateway.yaml -n $MODEL_NAMESPACE
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment