Unverified Commit 1c03404f authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

fix: update inference gateway deployment instructions (#1940)

parent 5ca570f9
......@@ -19,24 +19,35 @@ Follow the instructions in [deploy/cloud/README.md](../../deploy/cloud/README.md
Deploy 2 Dynamo aggregated graphs following the instructions in [examples/llm/README.md](../../examples/llm/README.md):
### Build Dynamo Graph
```bash
export DYNAMO_IMAGE=<your-registry>/<your-image-name>:<your-tag>
### Deploy Dynamo Graphs
# Build the service
cd $PROJECT_ROOT/examples/llm
export DYNAMO_TAG=$(dynamo build graphs.agg:Frontend | grep "Successfully built" | awk '{ print $NF }' | sed 's/\.$//')
```
Follow the commands to deploy 2 dynamo graphs -
### Deploy Dynamo Graphs
```bash
# Set pre-built vLLM dynamo base container image
export VLLM_RUNTIME_IMAGE=<dynamo-vllm-base-image>
# for example:
# export VLLM_RUNTIME_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
# run the following commands from dynamo repo's root folder
# Deploy first graph
export DEPLOYMENT_NAME=llm-agg1
# TODO: Deploy your service using a DynamoGraphDeployment CR.
yq eval '
.metadata.name = env(DEPLOYMENT_NAME) |
.spec.services[].extraPodSpec.mainContainer.image = env(VLLM_RUNTIME_IMAGE)
' examples/vllm_v0/deploy/agg.yaml > examples/vllm_v0/deploy/agg1.yaml
kubectl apply -f examples/vllm_v0/deploy/agg1.yaml
# Deploy second graph
export DEPLOYMENT_NAME=llm-agg2
# TODO: Deploy your service using a DynamoGraphDeployment CR.
yq eval '
.metadata.name = env(DEPLOYMENT_NAME) |
.spec.services[].extraPodSpec.mainContainer.image = env(VLLM_RUNTIME_IMAGE)
' examples/vllm_v0/deploy/agg.yaml > examples/vllm_v0/deploy/agg2.yaml
kubectl apply -f examples/vllm_v0/deploy/agg2.yaml
```
3. **Deploy Inference Gateway**
......
......@@ -33,8 +33,8 @@ spec:
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
imagePullPolicy: Always
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.4.0
imagePullPolicy: IfNotPresent
args:
- -poolName
- "dynamo-deepseek"
......
......@@ -18,11 +18,11 @@ metadata:
name: dynamo-deepseek
namespace: default
spec:
targetPortNumber: 3000
targetPortNumber: 8000
selector:
nvidia.com/dynamo-component-type: Frontend
nvidia.com/dynamo-component: Frontend
extensionRef:
failureMode: FailClose
failureMode: FailOpen
group: ""
kind: Service
name: dynamo-deepseek-epp
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment