Unverified Commit 1c03404f authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

fix: update inference gateway deployment instructions (#1940)

parent 5ca570f9
...@@ -19,24 +19,35 @@ Follow the instructions in [deploy/cloud/README.md](../../deploy/cloud/README.md ...@@ -19,24 +19,35 @@ Follow the instructions in [deploy/cloud/README.md](../../deploy/cloud/README.md
Deploy 2 Dynamo aggregated graphs following the instructions in [examples/llm/README.md](../../examples/llm/README.md): Deploy 2 Dynamo aggregated graphs following the instructions in [examples/llm/README.md](../../examples/llm/README.md):
### Build Dynamo Graph ### Deploy Dynamo Graphs
```bash
export DYNAMO_IMAGE=<your-registry>/<your-image-name>:<your-tag>
# Build the service Follow the commands to deploy 2 dynamo graphs -
cd $PROJECT_ROOT/examples/llm
export DYNAMO_TAG=$(dynamo build graphs.agg:Frontend | grep "Successfully built" | awk '{ print $NF }' | sed 's/\.$//')
```
### Deploy Dynamo Graphs
```bash ```bash
# Set pre-built vLLM dynamo base container image
export VLLM_RUNTIME_IMAGE=<dynamo-vllm-base-image>
# for example:
# export VLLM_RUNTIME_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
# run the following commands from dynamo repo's root folder
# Deploy first graph # Deploy first graph
export DEPLOYMENT_NAME=llm-agg1 export DEPLOYMENT_NAME=llm-agg1
# TODO: Deploy your service using a DynamoGraphDeployment CR. yq eval '
.metadata.name = env(DEPLOYMENT_NAME) |
.spec.services[].extraPodSpec.mainContainer.image = env(VLLM_RUNTIME_IMAGE)
' examples/vllm_v0/deploy/agg.yaml > examples/vllm_v0/deploy/agg1.yaml
kubectl apply -f examples/vllm_v0/deploy/agg1.yaml
# Deploy second graph # Deploy second graph
export DEPLOYMENT_NAME=llm-agg2 export DEPLOYMENT_NAME=llm-agg2
# TODO: Deploy your service using a DynamoGraphDeployment CR. yq eval '
.metadata.name = env(DEPLOYMENT_NAME) |
.spec.services[].extraPodSpec.mainContainer.image = env(VLLM_RUNTIME_IMAGE)
' examples/vllm_v0/deploy/agg.yaml > examples/vllm_v0/deploy/agg2.yaml
kubectl apply -f examples/vllm_v0/deploy/agg2.yaml
``` ```
3. **Deploy Inference Gateway** 3. **Deploy Inference Gateway**
......
...@@ -33,8 +33,8 @@ spec: ...@@ -33,8 +33,8 @@ spec:
terminationGracePeriodSeconds: 130 terminationGracePeriodSeconds: 130
containers: containers:
- name: epp - name: epp
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.4.0
imagePullPolicy: Always imagePullPolicy: IfNotPresent
args: args:
- -poolName - -poolName
- "dynamo-deepseek" - "dynamo-deepseek"
......
...@@ -18,11 +18,11 @@ metadata: ...@@ -18,11 +18,11 @@ metadata:
name: dynamo-deepseek name: dynamo-deepseek
namespace: default namespace: default
spec: spec:
targetPortNumber: 3000 targetPortNumber: 8000
selector: selector:
nvidia.com/dynamo-component-type: Frontend nvidia.com/dynamo-component: Frontend
extensionRef: extensionRef:
failureMode: FailClose failureMode: FailOpen
group: "" group: ""
kind: Service kind: Service
name: dynamo-deepseek-epp name: dynamo-deepseek-epp
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment