fix: Recipe namespace fix (#4445)

Signed-off-by: Anna Tchernych <atchernych@nvidia.com>

fix: Recipe namespace fix (#4445)
Signed-off-by: Anna Tchernych <atchernych@nvidia.com>
c3195612 · atchernych · GitHub · 164b0c29 · c3195612 · c3195612
Unverified Commit c3195612 authored Nov 18, 2025 by atchernych Committed by GitHub Nov 18, 2025
6 changed files
--- a/recipes/README.md
+++ b/recipes/README.md
@@ -147,16 +147,6 @@ kubectl logs -f job/<benchmark-job-name> -n ${NAMESPACE}
 kubectl logs job/<benchmark-job-name> -n ${NAMESPACE} | tail -50
 ```
-** Inference Gateway (GAIE) Integration (Optional)**
-For Llama-3-70B with vLLM (Aggregated), an example of integration with the Inference Gateway is provided.
-Follow to Follow [Deploy Inference Gateway Section 2](../deploy/inference-gateway/README.md#2-deploy-inference-gateway) to install GAIE. Then apply manifests.
-```bash
-export DEPLOY_PATH=llama-3-70b/vllm/agg/
-#DEPLOY_PATH=<model>/<framework>/<mode>/
-kubectl apply -R -f "$DEPLOY_PATH/gaie/k8s-manifests" -n "$NAMESPACE"
 ## Example Deployments
@@ -180,6 +170,19 @@ kubectl apply -f llama-3-70b/vllm/agg/deploy.yaml -n ${NAMESPACE}
 kubectl port-forward svc/llama3-70b-agg-frontend 8000:8000 -n ${NAMESPACE}
 ```
+### Inference Gateway (GAIE) Integration (Optional)**
+For Llama-3-70B with vLLM (Aggregated), an example of integration with the Inference Gateway is provided.
+Follow to Follow [Deploy Inference Gateway Section 2](../deploy/inference-gateway/README.md#2-deploy-inference-gateway) to install GAIE. Then apply manifests.
+Update the containers.epp.image in the deployment file, i.e. llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml
+```bash
+export DEPLOY_PATH=llama-3-70b/vllm/agg/
+#DEPLOY_PATH=<model>/<framework>/<mode>/
+kubectl apply -R -f "$DEPLOY_PATH/gaie/k8s-manifests" -n "$NAMESPACE"
 ### DeepSeek-R1 on GB200 (Multi-node)
 See [deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml](deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml) for the complete multi-node WideEP configuration.

--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/configmap.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/configmap.yaml
@@ -20,7 +20,6 @@ metadata:
  name: epp-config
  labels:
    app.kubernetes.io/name: dynamo-gaie
-    app.kubernetes.io/instance: llama3-70b-agg
 data:
  epp-config-dynamo.yaml: |
    apiVersion: inference.networking.x-k8s.io/v1alpha1

--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml
@@ -38,7 +38,7 @@ spec:
      containers:
        - name: epp
-          image: nvcr.io/nvstaging/ai-dynamo/epp-inference-extension-dynamo:v0.6.0-1
+          image: nvcr.io/nvstaging/ai-dynamo/dynamo-frontend:0.7.0rc2-amd64
          imagePullPolicy: IfNotPresent
          resources:
            requests:
@@ -73,8 +73,8 @@ spec:
              value: "dynamo-platform-etcd.$(PLATFORM_NAMESPACE):2379" #  update dynamo-platform to appropriate namespace
            - name: NATS_SERVER
              value: "nats://dynamo-platform-nats.$(PLATFORM_NAMESPACE):4222" #  update dynamo-platform to appropriate namespace
-            - name: DYN_NAMESPACE
+            - name: DYNAMO_NAMESPACE
-              value: "llama3-70b-agg"
+              value: "$(POD_NAMESPACE)-llama3-70b-agg"
            - name: DYNAMO_KV_BLOCK_SIZE
              value: "128" # UPDATE to match the --block-size in your deploy.yaml engine command
            - name: USE_STREAMING

--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/service.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/service.yaml
@@ -20,11 +20,10 @@ metadata:
  name: llama3-70b-agg-epp
 spec:
  selector:
-    app: llama3-70b-agg
+    app: llama3-70b-agg-epp
  ports:
    - protocol: TCP
      port: 9002
      targetPort: 9002
      appProtocol: http2
  type: ClusterIP
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-model.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-model.yaml
@@ -25,4 +25,3 @@ spec:
    group: inference.networking.x-k8s.io
    kind: InferencePool
    name: llama3-70b-agg-pool
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-pool.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-pool.yaml
@@ -22,7 +22,6 @@ spec:
  targetPortNumber: 8000
  selector:
    nvidia.com/dynamo-component: Frontend
-    nvidia.com/dynamo-namespace: llama3-70b-agg # # This is the Dynamo namespace where the model is deployed
  extensionRef:
    failureMode: FailOpen
    group: ""