Unverified Commit 4810ad34 authored by atchernych's avatar atchernych Committed by GitHub
Browse files

feat: update GAIE to release version with hints in headers (#5503)


Signed-off-by: default avatarAnna Tchernych <atchernych@nvidia.com>
parent b31b5b56
...@@ -12,6 +12,30 @@ ...@@ -12,6 +12,30 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# InferencePool for kGateway (stable API - inference.networking.k8s.io/v1)
# This is used by kGateway to resolve HTTPRoute backends
apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: {{ .Values.model.shortName }}-pool
namespace: {{ .Release.Namespace }}
spec:
targetPorts:
- number: {{ .Values.inferencePool.port }}
selector:
matchLabels:
nvidia.com/dynamo-component: Frontend
nvidia.com/dynamo-namespace: {{ include "dynamo-gaie.dynamoNamespace" . }}
endpointPickerRef:
group: ""
kind: Service
name: {{ .Values.model.shortName }}-epp
port:
number: 9002
---
# InferencePool for EPP (experimental API - inference.networking.x-k8s.io/v1alpha2)
# This is used by the EPP to discover and route to backend pods
apiVersion: inference.networking.x-k8s.io/v1alpha2 apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferencePool kind: InferencePool
metadata: metadata:
......
...@@ -27,8 +27,6 @@ model: ...@@ -27,8 +27,6 @@ model:
identifier: "Qwen/Qwen3-0.6B" identifier: "Qwen/Qwen3-0.6B"
# This is the short name of the model that will be used to generate the resource names # This is the short name of the model that will be used to generate the resource names
shortName: "qwen" shortName: "qwen"
# Criticality level for the inference model
criticality: "Critical"
# InferencePool configuration # InferencePool configuration
inferencePool: inferencePool:
...@@ -41,6 +39,9 @@ httpRoute: ...@@ -41,6 +39,9 @@ httpRoute:
enabled: true enabled: true
# Gateway parent reference configuration # Gateway parent reference configuration
gatewayName: "inference-gateway" gatewayName: "inference-gateway"
# Namespace where the Gateway resource is deployed
# Leave empty ("") to use the release namespace, or set explicitly (e.g., "default")
gatewayNamespace: ""
# Path matching configuration # Path matching configuration
path: path:
prefix: "/" prefix: "/"
...@@ -51,8 +52,8 @@ httpRoute: ...@@ -51,8 +52,8 @@ httpRoute:
extension: extension:
# EPP image for the GAIE extension (Dynamo EPP image by default) # EPP image for the GAIE extension (Dynamo EPP image by default)
image: "" # leave empty to use defaults below image: "" # leave empty to use defaults below
standardImage: us-central1-docker.pkg.dev/k8s-artifacts-prod/images/gateway-api-inference-extension/epp:v0.4.0 standardImage: us-central1-docker.pkg.dev/k8s-artifacts-prod/images/gateway-api-inference-extension/epp:v1.2.1
dynamoImage: nvcr.io/nvstaging/ai-dynamo/gaie-epp-dynamo:v0.6.0-1 dynamoImage: gitlab-master.nvidia.com:5005/dl/ai-dynamo/dynamo/epp-inference-extension-dynamo:new-build-1
# generic knobs you may want in both modes # generic knobs you may want in both modes
imagePullSecrets: imagePullSecrets:
......
...@@ -18,29 +18,26 @@ ...@@ -18,29 +18,26 @@
set -euo pipefail set -euo pipefail
trap 'echo "Error at line $LINENO. Exiting."' ERR trap 'echo "Error at line $LINENO. Exiting."' ERR
MODEL_NAMESPACE=my-model
kubectl create namespace $MODEL_NAMESPACE || true
# Install the Gateway API # Install the Gateway API
GATEWAY_API_VERSION=v1.3.0 GATEWAY_API_VERSION=v1.4.1
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/$GATEWAY_API_VERSION/standard-install.yaml kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/$GATEWAY_API_VERSION/standard-install.yaml
# Install the Inference Extension CRDs # Install the Inference Extension CRDs
INFERENCE_EXTENSION_VERSION=v0.5.1 IGW_LATEST_RELEASE=v1.2.1
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$INFERENCE_EXTENSION_VERSION/manifests.yaml -n $MODEL_NAMESPACE kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/${IGW_LATEST_RELEASE}/manifests.yaml
# Install and upgrade Kgateway (includes CRDs) # Install and upgrade Kgateway (includes CRDs)
KGATEWAY_VERSION=v2.0.3 KGTW_VERSION=v2.1.1
KGATEWAY_SYSTEM_NAMESPACE=kgateway-system helm upgrade -i --create-namespace --namespace kgateway-system --version $KGTW_VERSION \
kubectl create namespace $KGATEWAY_SYSTEM_NAMESPACE || true kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
helm upgrade -i --create-namespace --namespace $KGATEWAY_SYSTEM_NAMESPACE --version $KGATEWAY_VERSION kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
helm upgrade -i --namespace $KGATEWAY_SYSTEM_NAMESPACE --version $KGATEWAY_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true helm upgrade -i --namespace kgateway-system --version $KGTW_VERSION kgateway \
oci://cr.kgateway.dev/kgateway-dev/charts/kgateway \
--set inferenceExtension.enabled=true
kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/${IGW_LATEST_RELEASE}/config/manifests/gateway/kgateway/gateway.yaml
# Deploy the Gateway Instance kubectl patch gateway inference-gateway --type='json' \
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v1.0.0/config/manifests/gateway/kgateway/gateway.yaml -n $MODEL_NAMESPACE -p='[{"op": "replace", "path": "/spec/gatewayClassName", "value": "kgateway"}]'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment