Unverified Commit c3195612 authored by atchernych's avatar atchernych Committed by GitHub
Browse files

fix: Recipe namespace fix (#4445)


Signed-off-by: default avatarAnna Tchernych <atchernych@nvidia.com>
parent 164b0c29
...@@ -147,16 +147,6 @@ kubectl logs -f job/<benchmark-job-name> -n ${NAMESPACE} ...@@ -147,16 +147,6 @@ kubectl logs -f job/<benchmark-job-name> -n ${NAMESPACE}
kubectl logs job/<benchmark-job-name> -n ${NAMESPACE} | tail -50 kubectl logs job/<benchmark-job-name> -n ${NAMESPACE} | tail -50
``` ```
** Inference Gateway (GAIE) Integration (Optional)**
For Llama-3-70B with vLLM (Aggregated), an example of integration with the Inference Gateway is provided.
Follow to Follow [Deploy Inference Gateway Section 2](../deploy/inference-gateway/README.md#2-deploy-inference-gateway) to install GAIE. Then apply manifests.
```bash
export DEPLOY_PATH=llama-3-70b/vllm/agg/
#DEPLOY_PATH=<model>/<framework>/<mode>/
kubectl apply -R -f "$DEPLOY_PATH/gaie/k8s-manifests" -n "$NAMESPACE"
## Example Deployments ## Example Deployments
...@@ -180,6 +170,19 @@ kubectl apply -f llama-3-70b/vllm/agg/deploy.yaml -n ${NAMESPACE} ...@@ -180,6 +170,19 @@ kubectl apply -f llama-3-70b/vllm/agg/deploy.yaml -n ${NAMESPACE}
kubectl port-forward svc/llama3-70b-agg-frontend 8000:8000 -n ${NAMESPACE} kubectl port-forward svc/llama3-70b-agg-frontend 8000:8000 -n ${NAMESPACE}
``` ```
### Inference Gateway (GAIE) Integration (Optional)**
For Llama-3-70B with vLLM (Aggregated), an example of integration with the Inference Gateway is provided.
Follow to Follow [Deploy Inference Gateway Section 2](../deploy/inference-gateway/README.md#2-deploy-inference-gateway) to install GAIE. Then apply manifests.
Update the containers.epp.image in the deployment file, i.e. llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml
```bash
export DEPLOY_PATH=llama-3-70b/vllm/agg/
#DEPLOY_PATH=<model>/<framework>/<mode>/
kubectl apply -R -f "$DEPLOY_PATH/gaie/k8s-manifests" -n "$NAMESPACE"
### DeepSeek-R1 on GB200 (Multi-node) ### DeepSeek-R1 on GB200 (Multi-node)
See [deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml](deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml) for the complete multi-node WideEP configuration. See [deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml](deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml) for the complete multi-node WideEP configuration.
......
...@@ -20,7 +20,6 @@ metadata: ...@@ -20,7 +20,6 @@ metadata:
name: epp-config name: epp-config
labels: labels:
app.kubernetes.io/name: dynamo-gaie app.kubernetes.io/name: dynamo-gaie
app.kubernetes.io/instance: llama3-70b-agg
data: data:
epp-config-dynamo.yaml: | epp-config-dynamo.yaml: |
apiVersion: inference.networking.x-k8s.io/v1alpha1 apiVersion: inference.networking.x-k8s.io/v1alpha1
......
...@@ -38,7 +38,7 @@ spec: ...@@ -38,7 +38,7 @@ spec:
containers: containers:
- name: epp - name: epp
image: nvcr.io/nvstaging/ai-dynamo/epp-inference-extension-dynamo:v0.6.0-1 image: nvcr.io/nvstaging/ai-dynamo/dynamo-frontend:0.7.0rc2-amd64
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
resources: resources:
requests: requests:
...@@ -73,8 +73,8 @@ spec: ...@@ -73,8 +73,8 @@ spec:
value: "dynamo-platform-etcd.$(PLATFORM_NAMESPACE):2379" # update dynamo-platform to appropriate namespace value: "dynamo-platform-etcd.$(PLATFORM_NAMESPACE):2379" # update dynamo-platform to appropriate namespace
- name: NATS_SERVER - name: NATS_SERVER
value: "nats://dynamo-platform-nats.$(PLATFORM_NAMESPACE):4222" # update dynamo-platform to appropriate namespace value: "nats://dynamo-platform-nats.$(PLATFORM_NAMESPACE):4222" # update dynamo-platform to appropriate namespace
- name: DYN_NAMESPACE - name: DYNAMO_NAMESPACE
value: "llama3-70b-agg" value: "$(POD_NAMESPACE)-llama3-70b-agg"
- name: DYNAMO_KV_BLOCK_SIZE - name: DYNAMO_KV_BLOCK_SIZE
value: "128" # UPDATE to match the --block-size in your deploy.yaml engine command value: "128" # UPDATE to match the --block-size in your deploy.yaml engine command
- name: USE_STREAMING - name: USE_STREAMING
......
...@@ -20,11 +20,10 @@ metadata: ...@@ -20,11 +20,10 @@ metadata:
name: llama3-70b-agg-epp name: llama3-70b-agg-epp
spec: spec:
selector: selector:
app: llama3-70b-agg app: llama3-70b-agg-epp
ports: ports:
- protocol: TCP - protocol: TCP
port: 9002 port: 9002
targetPort: 9002 targetPort: 9002
appProtocol: http2 appProtocol: http2
type: ClusterIP type: ClusterIP
...@@ -25,4 +25,3 @@ spec: ...@@ -25,4 +25,3 @@ spec:
group: inference.networking.x-k8s.io group: inference.networking.x-k8s.io
kind: InferencePool kind: InferencePool
name: llama3-70b-agg-pool name: llama3-70b-agg-pool
...@@ -22,7 +22,6 @@ spec: ...@@ -22,7 +22,6 @@ spec:
targetPortNumber: 8000 targetPortNumber: 8000
selector: selector:
nvidia.com/dynamo-component: Frontend nvidia.com/dynamo-component: Frontend
nvidia.com/dynamo-namespace: llama3-70b-agg # # This is the Dynamo namespace where the model is deployed
extensionRef: extensionRef:
failureMode: FailOpen failureMode: FailOpen
group: "" group: ""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment