Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
4810ad34
Unverified
Commit
4810ad34
authored
Jan 22, 2026
by
atchernych
Committed by
GitHub
Jan 22, 2026
Browse files
feat: update GAIE to release version with hints in headers (#5503)
Signed-off-by:
Anna Tchernych
<
atchernych@nvidia.com
>
parent
b31b5b56
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
41 additions
and
19 deletions
+41
-19
deploy/inference-gateway/helm/dynamo-gaie/templates/inference-pool.yaml
...ce-gateway/helm/dynamo-gaie/templates/inference-pool.yaml
+24
-0
deploy/inference-gateway/helm/dynamo-gaie/values.yaml
deploy/inference-gateway/helm/dynamo-gaie/values.yaml
+5
-4
deploy/inference-gateway/scripts/install_gaie_crd_kgateway.sh
...oy/inference-gateway/scripts/install_gaie_crd_kgateway.sh
+12
-15
No files found.
deploy/inference-gateway/helm/dynamo-gaie/templates/inference-pool.yaml
View file @
4810ad34
...
...
@@ -12,6 +12,30 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# InferencePool for kGateway (stable API - inference.networking.k8s.io/v1)
# This is used by kGateway to resolve HTTPRoute backends
apiVersion
:
inference.networking.k8s.io/v1
kind
:
InferencePool
metadata
:
name
:
{{
.Values.model.shortName
}}
-pool
namespace
:
{{
.Release.Namespace
}}
spec
:
targetPorts
:
-
number
:
{{
.Values.inferencePool.port
}}
selector
:
matchLabels
:
nvidia.com/dynamo-component
:
Frontend
nvidia.com/dynamo-namespace
:
{{
include "dynamo-gaie.dynamoNamespace" .
}}
endpointPickerRef
:
group
:
"
"
kind
:
Service
name
:
{{
.Values.model.shortName
}}
-epp
port
:
number
:
9002
---
# InferencePool for EPP (experimental API - inference.networking.x-k8s.io/v1alpha2)
# This is used by the EPP to discover and route to backend pods
apiVersion
:
inference.networking.x-k8s.io/v1alpha2
kind
:
InferencePool
metadata
:
...
...
deploy/inference-gateway/helm/dynamo-gaie/values.yaml
View file @
4810ad34
...
...
@@ -27,8 +27,6 @@ model:
identifier
:
"
Qwen/Qwen3-0.6B"
# This is the short name of the model that will be used to generate the resource names
shortName
:
"
qwen"
# Criticality level for the inference model
criticality
:
"
Critical"
# InferencePool configuration
inferencePool
:
...
...
@@ -41,6 +39,9 @@ httpRoute:
enabled
:
true
# Gateway parent reference configuration
gatewayName
:
"
inference-gateway"
# Namespace where the Gateway resource is deployed
# Leave empty ("") to use the release namespace, or set explicitly (e.g., "default")
gatewayNamespace
:
"
"
# Path matching configuration
path
:
prefix
:
"
/"
...
...
@@ -51,8 +52,8 @@ httpRoute:
extension
:
# EPP image for the GAIE extension (Dynamo EPP image by default)
image
:
"
"
# leave empty to use defaults below
standardImage
:
us-central1-docker.pkg.dev/k8s-artifacts-prod/images/gateway-api-inference-extension/epp:v
0.4.0
dynamoImage
:
nvcr.io/nvstaging/ai-dynamo/gaie-epp-dynamo:v0.6.0
-1
standardImage
:
us-central1-docker.pkg.dev/k8s-artifacts-prod/images/gateway-api-inference-extension/epp:v
1.2.1
dynamoImage
:
gitlab-master.nvidia.com:5005/dl/ai-dynamo/dynamo/epp-inference-extension-dynamo:new-build
-1
# generic knobs you may want in both modes
imagePullSecrets
:
...
...
deploy/inference-gateway/install_gaie_crd_kgateway.sh
→
deploy/inference-gateway/
scripts/
install_gaie_crd_kgateway.sh
View file @
4810ad34
...
...
@@ -18,29 +18,26 @@
set
-euo
pipefail
trap
'echo "Error at line $LINENO. Exiting."'
ERR
MODEL_NAMESPACE
=
my-model
kubectl create namespace
$MODEL_NAMESPACE
||
true
# Install the Gateway API
GATEWAY_API_VERSION
=
v1.
3.0
GATEWAY_API_VERSION
=
v1.
4.1
kubectl apply
-f
https://github.com/kubernetes-sigs/gateway-api/releases/download/
$GATEWAY_API_VERSION
/standard-install.yaml
# Install the Inference Extension CRDs
I
NFERENCE_EXTENSION_VERSION
=
v0.5
.1
kubectl apply
-f
https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/
$
INFERENCE_EXTENSION_VERSION
/manifests.yaml
-n
$MODEL_NAMESPACE
I
GW_LATEST_RELEASE
=
v1.2
.1
kubectl apply
-f
https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/
$
{
IGW_LATEST_RELEASE
}
/manifests.yaml
# Install and upgrade Kgateway (includes CRDs)
KGATEWAY_VERSION
=
v2.0.3
KGATEWAY_SYSTEM_NAMESPACE
=
kgateway-system
kubectl create namespace
$KGATEWAY_SYSTEM_NAMESPACE
||
true
helm upgrade
-i
--create-namespace
--namespace
$KGATEWAY_SYSTEM_NAMESPACE
--version
$KGATEWAY_VERSION
kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
KGTW_VERSION
=
v2.1.1
helm upgrade
-i
--create-namespace
--namespace
kgateway-system
--version
$KGTW_VERSION
\
kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
helm upgrade
-i
--namespace
$KGATEWAY_SYSTEM_NAMESPACE
--version
$KGATEWAY_VERSION
kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway
--set
inferenceExtension.enabled
=
true
helm upgrade
-i
--namespace
kgateway-system
--version
$KGTW_VERSION
kgateway
\
oci://cr.kgateway.dev/kgateway-dev/charts/kgateway
\
--set
inferenceExtension.enabled
=
true
kubectl apply
-f
https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/
${
IGW_LATEST_RELEASE
}
/config/manifests/gateway/kgateway/gateway.yaml
# Deploy the Gateway Instance
kubectl apply
-f
https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/v1.0.0/config/manifests/gateway/kgateway/gateway.yaml
-n
$MODEL_NAMESPACE
kubectl patch gateway inference-gateway
--type
=
'json'
\
-p
=
'[{"op": "replace", "path": "/spec/gatewayClassName", "value": "kgateway"}]'
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment