feat: Streamline GAIE recipe (#3829)

Signed-off-by: Anna Tchernych <atchernych@nvidia.com>

feat: Streamline GAIE recipe (#3829)
Signed-off-by: Anna Tchernych <atchernych@nvidia.com>
e64d2f09 · atchernych · GitHub · 3d7b4525 · e64d2f09 · e64d2f09
Unverified Commit e64d2f09 authored Oct 28, 2025 by atchernych Committed by GitHub Oct 28, 2025
13 changed files
--- a/deploy/inference-gateway/README.md
+++ b/deploy/inference-gateway/README.md
@@ -53,7 +53,7 @@ b. Install the Inference Extension CRDs (Inference Model and Inference Pool CRDs

 ```bash
 INFERENCE_EXTENSION_VERSION=v0.5.1
-kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml -n  my-model
+kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml
 ```

 c. Install `kgateway` CRDs and kgateway.

--- a/recipes/README.md
+++ b/recipes/README.md
@@ -13,14 +13,14 @@ This repository contains production-ready recipes for deploying large language m

 ## Available Models

-| Model Family    | Framework | Deployment Mode      | GPU Requirements | Status | Benchmark |
-|-----------------|-----------|---------------------|------------------|--------|-----------|
-| llama-3-70b     | vllm      | agg                 | 4x H100/H200     | ✅     | ✅        |
-| llama-3-70b     | vllm      | disagg (1 node)      | 8x H100/H200    | ✅     | ✅        |
-| llama-3-70b     | vllm      | disagg (multi-node)     | 16x H100/H200    | ✅     | ✅        |
-| deepseek-r1     | sglang    | disagg (1 node, wide-ep)     | 8x H200          | ✅     | 🚧        |
-| deepseek-r1     | sglang    | disagg (multi-node, wide-ep)     | 16x H200        | ✅     | 🚧        |
-| gpt-oss-120b    | trtllm    | agg                 | 4x GB200         | ✅     | ✅        |
+| Model Family    | Framework | Deployment Mode      | GPU Requirements | Status | Benchmark |GAIE-integration |
+|-----------------|-----------|---------------------|------------------|--------|-----------|------------------|
+| llama-3-70b     | vllm      | agg                 | 4x H100/H200     | ✅     | ✅        |✅                |
+| llama-3-70b     | vllm      | disagg (1 node)      | 8x H100/H200    | ✅     | ✅        | 🚧               |
+| llama-3-70b     | vllm      | disagg (multi-node)     | 16x H100/H200    | ✅     | ✅        |🚧               |
+| deepseek-r1     | sglang    | disagg (1 node, wide-ep)     | 8x H200          | ✅     | 🚧        |🚧               |
+| deepseek-r1     | sglang    | disagg (multi-node, wide-ep)     | 16x H200        | ✅     | 🚧        |🚧               |
+| gpt-oss-120b    | trtllm    | agg                 | 4x GB200         | ✅     | ✅        |🚧               |

 **Legend:**
 - ✅ Functional
@@ -89,9 +89,7 @@ vim hf_hub_secret/hf_hub_secret.yaml
 kubectl apply -f hf_hub_secret/hf_hub_secret.yaml -n ${NAMESPACE}
 ```

-### 6. Configure Storage Class
-
-Configure persistent storage for model caching:
+6. Configure Storage Class

 ```bash
 # Check available storage classes
@@ -160,6 +158,20 @@ kubectl apply -f hf_hub_secret/hf_hub_secret.yaml -n ${NAMESPACE}
 ./run.sh --dry-run --model llama-3-70b --framework vllm --deployment agg
 ```

+## If deploying with Gateway API Inference extension GAIE
+
+1. Follow [Deploy Inference Gateway Section 2](../deploy/inference-gateway/README.md#2-deploy-inference-gateway) to install GAIE.
+
+2. Apply manifests by running a script.
+
+```bash
+# Match the block size to the cli value in your deployment file deploy.yaml: - "python3 -m dynamo.vllm ... --block-size 128"
+export DYNAMO_KV_BLOCK_SIZE=128
+export EPP_IMAGE=nvcr.io/you/epp:tag
+# Add --gaie argument to the script i.e.:
+./run.sh --model llama-3-70b --framework vllm --gaie agg
+```
+The script will perform gateway checks and apply the manifests.

 ## Option 2: Manual Deployment


--- a/recipes/gaie_checks.sh
+++ b/recipes/gaie_checks.sh
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#!/usr/bin/env bash
+set -Eeuo pipefail
+
+# ===== Namespace ensure =====
+if ! kubectl get ns "$NAMESPACE" >/dev/null 2>&1; then
+  kubectl create namespace "$NAMESPACE"
+fi
+
+KGW_NS="${KGW_NS:-kgateway-system}"
+
+ok()  { printf "✅ %s\n" "$*"; }
+fail(){ printf "❌ %s\n" "$*" >&2; exit 1; }
+info(){ printf "ℹ️  %s\n" "$*"; }
+
+need() { command -v "$1" >/dev/null 2>&1 || fail "'$1' is required"; }
+
+need kubectl
+
+# ===== Config (env overridable) =====
+: "${NAMESPACE:=dynamo}"
+
+# ===== Pre-flight checks =====
+command -v helm >/dev/null 2>&1 || { echo "ERROR: helm not found"; exit 1; }
+command -v kubectl >/dev/null 2>&1 || { echo "ERROR: kubectl not found"; exit 1; }
+
+GATEWAY_CRDS=(
+  gateways.gateway.networking.k8s.io
+  gatewayclasses.gateway.networking.k8s.io
+  httproutes.gateway.networking.k8s.io
+  referencegrants.gateway.networking.k8s.io
+)
+info "Checking Gateway API CRDs…"
+for c in "${GATEWAY_CRDS[@]}"; do
+  kubectl get crd "$c" >/dev/null 2>&1 || fail "Missing CRD: $c (run step a)"
+  kubectl wait --for=condition=Established "crd/$c" --timeout=60s >/dev/null || fail "CRD not Established: $c"
+done
+ok "Gateway API CRDs present & Established"
+
+GAIE_CRDS=(
+  inferencemodels.inference.networking.x-k8s.io
+  inferencepools.inference.networking.x-k8s.io
+)
+
+info "Checking GAIE (Inference Extension) CRDs…"
+for c in "${GAIE_CRDS[@]}"; do
+  kubectl get crd "$c" >/dev/null 2>&1 || fail "Missing CRD: $c (run step b install of inference extension)"
+  kubectl wait --for=condition=Established "crd/$c" --timeout=60s >/dev/null || fail "CRD not Established: $c"
+done
+ok "GAIE CRDs present & Established"
+
+info "Checking Kgateway controller in namespace '$KGW_NS'…"
+# namespace must exist
+kubectl get ns "$KGW_NS" >/dev/null 2>&1 || fail "Namespace '$KGW_NS' not found (run step c Helm installs)"
+
+# pods should be running
+if ! kubectl get pods -n "$KGW_NS" -l app.kubernetes.io/name=kgateway >/dev/null 2>&1; then
+  # fallback label (charts sometimes label differently)
+  PODS=$(kubectl get pods -n "$KGW_NS" -o name | grep -E 'kgateway|gateway' || true)
+  [[ -z "${PODS:-}" ]] && fail "Kgateway pods not found in '$KGW_NS'"
+else
+  PODS=$(kubectl get pods -n "$KGW_NS" -l app.kubernetes.io/name=kgateway -o name)
+fi
+for p in $PODS; do
+  kubectl wait -n "$KGW_NS" --for=condition=Ready "$p" --timeout=180s >/dev/null || fail "Pod not Ready: $p"
+done
+ok "Kgateway controller pods Ready"
+
+kubectl get gateway.gateway.networking.k8s.io inference-gateway -n "$NAMESPACE" >/dev/null 2>&1 || fail "Gateway 'inference-gateway' not found in $NAMESPACE (apply step d manifest)"
+
+ok "GAIE is installed and the gateway is up in namespace '$NAMESPACE'."
+
+
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/configmap.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/configmap.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: You can remove the namespace field if using kubectl apply -n
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: epp-config
+  labels:
+    app.kubernetes.io/name: dynamo-gaie
+    app.kubernetes.io/instance: llama3-70b-agg
+data:
+  epp-config-dynamo.yaml: |
+    apiVersion: inference.networking.x-k8s.io/v1alpha1
+    kind: EndpointPickerConfig
+    plugins:
+      # Required: tells EPP which profile to use (even if you only have one)
+      - type: single-profile-handler
+
+      # Picker: chooses the final endpoint after scoring
+      - name: picker
+        type: max-score-picker
+      - name: dyn-pre
+        type: dynamo-inject-workerid
+        parameters: {}
+      - name: dyn-kv
+        type: kv-aware-scorer
+        parameters:
+          frontendURL: http://127.0.0.1:8000/v1/chat/completions
+          timeoutMS: 10000
+    schedulingProfiles:
+      - name: default
+        plugins:
+          - pluginRef: dyn-kv
+            weight: 1
+          - pluginRef: picker
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: Update the namespace field below to match your deployment namespace
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama3-70b-agg-epp
+  labels:
+    app: llama3-70b-agg-epp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llama3-70b-agg-epp
+  template:
+    metadata:
+      labels:
+        app: llama3-70b-agg-epp
+    spec:
+      serviceAccountName: epp-sa
+      terminationGracePeriodSeconds: 130
+
+      imagePullSecrets:
+        - name: docker-imagepullsecret
+
+      containers:
+        - name: epp
+          image: nvcr.io/nvstaging/ai-dynamo/epp-inference-extension-dynamo:v0.6.0-1
+          imagePullPolicy: IfNotPresent
+          resources:
+            requests:
+              memory: "1Gi"
+              cpu: "1"
+            limits:
+              memory: "2Gi"
+              cpu: "2"
+          command: ["/bin/sh", "-c"]
+          args:
+            - >
+              exec /epp
+              -poolName "llama3-70b-agg-pool"
+              -poolNamespace "$POD_NAMESPACE"
+              -v 4 --zap-encoder json
+              -grpcPort 9002 -grpcHealthPort 9003
+              -configFile /etc/epp/epp-config-dynamo.yaml
+
+          volumeMounts:
+            - name: epp-config
+              mountPath: /etc/epp
+              readOnly: true
+
+          env:
+            - name: POD_NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+            - name: PLATFORM_NAMESPACE
+              value: "$(POD_NAMESPACE)" # set to your dynamo platform namespace if different
+            - name: ETCD_ENDPOINTS
+              value: "dynamo-platform-etcd.$(PLATFORM_NAMESPACE):2379" #  update dynamo-platform to appropriate namespace
+            - name: NATS_SERVER
+              value: "nats://dynamo-platform-nats.$(PLATFORM_NAMESPACE):4222" #  update dynamo-platform to appropriate namespace
+            - name: DYN_NAMESPACE
+              value: "llama3-70b-agg"
+            - name: DYNAMO_KV_BLOCK_SIZE
+              value: "128" # UPDATE to match the --block-size in your deploy.yaml engine command
+            - name: USE_STREAMING
+              value: "true"
+
+          ports:
+            - containerPort: 9002
+            - containerPort: 9003
+            - name: metrics
+              containerPort: 9090
+          livenessProbe:
+            grpc:
+              port: 9003
+              service: inference-extension
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          readinessProbe:
+            grpc:
+              port: 9003
+              service: inference-extension
+            initialDelaySeconds: 5
+            periodSeconds: 10
+
+      volumes:
+        - name: epp-config
+          configMap:
+            name: epp-config
+            items:
+              - key: epp-config-dynamo.yaml
+                path: epp-config-dynamo.yaml
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/http-route.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/http-route.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: You can remove metadata.namespace if using kubectl apply -n
+# The backendRefs.namespace field should match where your InferencePool is deployed
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llama3-70b-agg-route
+spec:
+  parentRefs:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: inference-gateway
+  rules:
+    - backendRefs:
+        - group: inference.networking.x-k8s.io
+          kind: InferencePool
+          name: llama3-70b-agg-pool
+          port: 8000
+          weight: 1
+      matches:
+        - path:
+            type: PathPrefix
+            value: /
+      timeouts:
+        request: 300s
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/service.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/service.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: Update the namespace field below to match your deployment namespace
+apiVersion: v1
+kind: Service
+metadata:
+  name: llama3-70b-agg-epp
+spec:
+  selector:
+    app: llama3-70b-agg
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+      appProtocol: http2
+  type: ClusterIP
+
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-model.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-model.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: You can remove the namespace field if using kubectl apply -n
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferenceModel
+metadata:
+  name: llama3-70b-agg-model
+spec:
+  criticality: Critical
+  modelName: RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic
+  poolRef:
+    group: inference.networking.x-k8s.io
+    kind: InferencePool
+    name: llama3-70b-agg-pool
+
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-pool.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/model/inference-pool.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: You can remove the namespace field if using kubectl apply -n
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferencePool
+metadata:
+  name: llama3-70b-agg-pool
+spec:
+  targetPortNumber: 8000
+  selector:
+    nvidia.com/dynamo-component: Frontend
+    nvidia.com/dynamo-namespace: llama3-70b-agg # # This is the Dynamo namespace where the model is deployed
+  extensionRef:
+    failureMode: FailOpen
+    group: ""
+    kind: Service
+    name: llama3-70b-agg-epp
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/cluster-role.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/cluster-role.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: pod-read
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencepools"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencemodels"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - authorization.k8s.io
+  resources:
+  - subjectaccessreviews
+  verbs:
+  - create
+
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/role-binding.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/role-binding.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: ClusterRoleBinding is cluster-scoped (no metadata.namespace)
+# The subjects.namespace field specifies where the ServiceAccount is located
+# This CANNOT be removed - it must match your deployment namespace
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: pod-read-binding
+  # no metadata.namespace - kubectl -n sets it
+subjects:
+  - kind: ServiceAccount
+    name: epp-sa
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: pod-read
--- a/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/service-account.yaml
+++ b/recipes/llama-3-70b/vllm/agg/gaie/k8s-manifests/rbac/service-account.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: epp-sa
+# no metadata.namespace (kubectl -n sets it)
--- a/recipes/run.sh
+++ b/recipes/run.sh
@@ -14,9 +14,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+set -euo pipefail
+IFS=$'\n\t'
+
 RECIPES_DIR="$( cd "$( dirname "$0" )" && pwd )"
 # Default values
 NAMESPACE="${NAMESPACE:-dynamo}"
+DEPLOY_TYPE=""
+GAIE="${GAIE:-false}"
 DEPLOYMENT=""
 MODEL=""
 FRAMEWORK=""
@@ -38,6 +43,7 @@ usage() {
    echo "Optional:"
    echo "  --namespace <ns>   Kubernetes namespace (default: dynamo)"
    echo "  --dry-run          Print commands without executing them"
+    echo "  --gaie[=true|false] Enable GAIE integration subfolder (applies GAIE manifests skips benchmark) (default: ${GAIE})"
    echo "  -h, --help         Show this help message"
    echo ""
    echo "Environment Variables:"
@@ -98,6 +104,22 @@ while [[ $# -gt 0 ]]; do
                missing_requirement "$1"
            fi
            ;;
+        --gaie)
+            GAIE=true
+            shift
+            ;;
+        --gaie=false)
+            GAIE=false
+            shift
+            ;;
+        --gaie=*)
+            GAIE="${1#*=}"
+            case "${GAIE,,}" in
+              true|false) GAIE="${GAIE,,}";;
+              *) echo "ERROR: --gaie must be true or false"; exit 1;;
+            esac
+            shift
+            ;;
        -h|--help)
            usage
            ;;
@@ -137,6 +159,7 @@ fi
 MODEL_DIR="$RECIPES_DIR/$MODEL"
 FRAMEWORK_DIR="$MODEL_DIR/${FRAMEWORK,,}"
 DEPLOY_PATH="$FRAMEWORK_DIR/$DEPLOYMENT"
+INTEGRATION="$([[ "${GAIE,,}" == "true" ]] && echo gaie || echo "")"

 # Check if model directory exists
 if [[ ! -d "$MODEL_DIR" ]]; then
@@ -188,6 +211,7 @@ echo "Model: $MODEL"
 echo "Framework: ${FRAMEWORK,,}"
 echo "Deployment Type: $DEPLOYMENT"
 echo "Namespace: $NAMESPACE"
+echo "GAIE integration: $GAIE"
 echo "======================================"

 # Handle model downloading
@@ -205,6 +229,15 @@ $DRY_RUN kubectl wait --for=condition=Complete job/$MODEL_DOWNLOAD_JOB_NAME -n $
 echo "Deploying $MODEL ${FRAMEWORK,,} $DEPLOYMENT configuration..."
 $DRY_RUN kubectl apply -n $NAMESPACE -f $DEPLOY_FILE

+if [[ "$INTEGRATION" == "gaie" ]]; then
+    # run gaie checks.
+    SCRIPT_DIR="$(cd -- "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+    "${SCRIPT_DIR}/gaie_checks.sh"
+    kubectl apply -f "$DEPLOY_PATH/gaie/k8s-manifests" -n "$NAMESPACE"
+    # For now do not run the benchmark
+    exit
+ fi
+
 # Launch the benchmark job (if available)
 if [[ "$PERF_AVAILABLE" == "true" ]]; then
    echo "Launching benchmark job..."