Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
8c2a4681
Unverified
Commit
8c2a4681
authored
Mar 24, 2026
by
atchernych
Committed by
GitHub
Mar 24, 2026
Browse files
feat: Add GAIE nightly integration test [DEP-423] (#7458)
Signed-off-by:
Anna Tchernych
<
atchernych@nvidia.com
>
parent
16bca7b6
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
520 additions
and
19 deletions
+520
-19
.github/actions/dynamo-deploy-test/action.yml
.github/actions/dynamo-deploy-test/action.yml
+52
-9
.github/workflows/post-merge-ci.yml
.github/workflows/post-merge-ci.yml
+42
-4
.github/workflows/shared-deploy-test-framework.yml
.github/workflows/shared-deploy-test-framework.yml
+1
-0
docs/kubernetes/inference-gateway.md
docs/kubernetes/inference-gateway.md
+4
-0
examples/backends/vllm/deploy/gaie/agg.yaml
examples/backends/vllm/deploy/gaie/agg.yaml
+1
-1
examples/backends/vllm/deploy/gaie/disagg.yaml
examples/backends/vllm/deploy/gaie/disagg.yaml
+159
-0
examples/backends/vllm/deploy/gaie/http-route.yaml
examples/backends/vllm/deploy/gaie/http-route.yaml
+3
-3
pyproject.toml
pyproject.toml
+2
-0
recipes/llama-3-70b/vllm/disagg-single-node/gaie/deploy.yaml
recipes/llama-3-70b/vllm/disagg-single-node/gaie/deploy.yaml
+10
-0
tests/deploy/conftest.py
tests/deploy/conftest.py
+6
-0
tests/deploy/test_deploy.py
tests/deploy/test_deploy.py
+212
-1
tests/utils/client.py
tests/utils/client.py
+4
-1
tests/utils/managed_deployment.py
tests/utils/managed_deployment.py
+24
-0
No files found.
.github/actions/dynamo-deploy-test/action.yml
View file @
8c2a4681
...
@@ -31,17 +31,28 @@ inputs:
...
@@ -31,17 +31,28 @@ inputs:
framework
:
framework
:
description
:
'
Framework
name
(vllm,
sglang,
trtllm)'
description
:
'
Framework
name
(vllm,
sglang,
trtllm)'
required
:
true
required
:
false
default
:
'
'
profile
:
profile
:
description
:
'
Deployment
profile
(e.g.,
disagg_router,
agg)'
description
:
'
Deployment
profile
(e.g.,
disagg_router,
agg)'
required
:
true
required
:
false
default
:
'
'
image
:
image
:
description
:
'
Full
container
image
reference
for
the
framework
runtime'
description
:
'
Full
container
image
reference
for
the
framework
runtime'
required
:
true
required
:
false
default
:
'
'
platform_arch
:
platform_arch
:
description
:
'
Platform
architecture
(amd64,
arm64)'
description
:
'
Platform
architecture
(amd64,
arm64)'
required
:
false
required
:
false
default
:
'
amd64'
default
:
'
amd64'
test_name
:
description
:
'
Name
for
artifact
naming.
Defaults
to
{framework}_{profile}.'
required
:
false
default
:
'
'
extra_pytest_args
:
description
:
'
Additional
pytest
arguments
(e.g.,
--frontend-image=...)'
required
:
false
default
:
'
'
runs
:
runs
:
using
:
"
composite"
using
:
"
composite"
...
@@ -82,16 +93,33 @@ runs:
...
@@ -82,16 +93,33 @@ runs:
FRAMEWORK
:
${{ inputs.framework }}
FRAMEWORK
:
${{ inputs.framework }}
PROFILE
:
${{ inputs.profile }}
PROFILE
:
${{ inputs.profile }}
IMAGE
:
${{ inputs.image }}
IMAGE
:
${{ inputs.image }}
TEST_NAME
:
${{ inputs.test_name }}
EXTRA_PYTEST_ARGS
:
${{ inputs.extra_pytest_args }}
run
:
|
run
:
|
mkdir -p test-results
mkdir -p test-results
PYTEST_ARGS=""
if [ -n "${FRAMEWORK}" ]; then
PYTEST_ARGS+=" --framework=${FRAMEWORK}"
fi
if [ -n "${PROFILE}" ]; then
PYTEST_ARGS+=" --profile=${PROFILE}"
fi
if [ -n "${IMAGE}" ]; then
PYTEST_ARGS+=" --image=${IMAGE}"
fi
if [ -z "${TEST_NAME}" ]; then
TEST_NAME="${FRAMEWORK:-unknown}_${PROFILE:-unknown}"
fi
pytest tests/deploy/test_deploy.py \
pytest tests/deploy/test_deploy.py \
--framework="${FRAMEWORK}" \
--profile="${PROFILE}" \
--image="${IMAGE}" \
--namespace="${NAMESPACE}" \
--namespace="${NAMESPACE}" \
${PYTEST_ARGS} \
${EXTRA_PYTEST_ARGS} \
-v -s \
-v -s \
--durations=10 \
--durations=10 \
--junitxml=test-results/pytest_deploy_${
FRAMEWORK}_${PROFIL
E}_${{ inputs.platform_arch }}_${{ github.run_id }}_${{ job.check_run_id }}.xml \
--junitxml=test-results/pytest_deploy_${
TEST_NAM
E}_${{ inputs.platform_arch }}_${{ github.run_id }}_${{ job.check_run_id }}.xml \
--log-cli-level=INFO
--log-cli-level=INFO
-
name
:
Cleanup Deployment
-
name
:
Cleanup Deployment
...
@@ -124,10 +152,25 @@ runs:
...
@@ -124,10 +152,25 @@ runs:
fi
fi
echo "::endgroup::"
echo "::endgroup::"
-
name
:
Determine test name for artifacts
id
:
test-name
if
:
always()
shell
:
bash
env
:
TEST_NAME
:
${{ inputs.test_name }}
FRAMEWORK
:
${{ inputs.framework }}
PROFILE
:
${{ inputs.profile }}
run
:
|
if [ -z "${TEST_NAME}" ]; then
TEST_NAME="${FRAMEWORK:-unknown}_${PROFILE:-unknown}"
fi
echo "name=${TEST_NAME}" >> $GITHUB_OUTPUT
-
name
:
Upload Test Results
-
name
:
Upload Test Results
uses
:
actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
#v6
uses
:
actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
#v6
if
:
always()
if
:
always()
with
:
with
:
name
:
test-results-${{
inputs.framework }}-${{ inputs.profil
e }}-${{ inputs.platform_arch }}-${{ github.run_id }}-${{ job.check_run_id }}
name
:
test-results-${{
steps.test-name.outputs.nam
e }}-${{ inputs.platform_arch }}-${{ github.run_id }}-${{ job.check_run_id }}
path
:
test-results/pytest_deploy_${{
inputs.framework }}_${{ inputs.profil
e }}_${{ inputs.platform_arch }}_${{ github.run_id }}_${{ job.check_run_id }}.xml
path
:
test-results/pytest_deploy_${{
steps.test-name.outputs.nam
e }}_${{ inputs.platform_arch }}_${{ github.run_id }}_${{ job.check_run_id }}.xml
retention-days
:
7
retention-days
:
7
.github/workflows/post-merge-ci.yml
View file @
8c2a4681
...
@@ -190,6 +190,16 @@ jobs:
...
@@ -190,6 +190,16 @@ jobs:
copy_to_acr
:
false
copy_to_acr
:
false
secrets
:
inherit
secrets
:
inherit
# ============================================================================
# FRONTEND IMAGE BUILD
# ============================================================================
frontend-image
:
name
:
Frontend Image
uses
:
./.github/workflows/build-frontend-image.yaml
with
:
skip_change_detection
:
true
secrets
:
inherit
# ============================================================================
# ============================================================================
# Operator
# Operator
# ============================================================================
# ============================================================================
...
@@ -354,7 +364,7 @@ jobs:
...
@@ -354,7 +364,7 @@ jobs:
deploy-cleanup
:
deploy-cleanup
:
if
:
always()
if
:
always()
needs
:
[
deploy-operator
,
deploy-test-vllm
,
deploy-test-sglang
,
deploy-test-trtllm
]
needs
:
[
deploy-operator
,
deploy-test-vllm
,
deploy-test-sglang
,
deploy-test-trtllm
,
deploy-test-gaie
]
runs-on
:
prod-default-small-v2
runs-on
:
prod-default-small-v2
steps
:
steps
:
-
uses
:
actions/checkout@v4
-
uses
:
actions/checkout@v4
...
@@ -366,9 +376,37 @@ jobs:
...
@@ -366,9 +376,37 @@ jobs:
vcluster_name
:
${{ needs.deploy-operator.outputs.vcluster_name }}
vcluster_name
:
${{ needs.deploy-operator.outputs.vcluster_name }}
vcluster_namespace
:
${{ needs.deploy-operator.outputs.namespace }}
vcluster_namespace
:
${{ needs.deploy-operator.outputs.namespace }}
# ============================================================================
# GAIE DEPLOY TEST
# ============================================================================
deploy-test-gaie
:
name
:
GAIE Deploy Test
runs-on
:
prod-default-small-v2
needs
:
[
deploy-operator
,
frontend-image
,
vllm-pipeline
]
timeout-minutes
:
30
permissions
:
contents
:
read
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
# v4.3.0
-
name
:
Run GAIE Deploy Test
id
:
deploy-test
uses
:
./.github/actions/dynamo-deploy-test
with
:
kubeconfig_base64
:
${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
namespace
:
${{ needs.deploy-operator.outputs.namespace }}
registry
:
${{ secrets.AZURE_ACR_HOSTNAME }}
operator_tag
:
${{ needs.deploy-operator.outputs.operator_tag }}
hf_token
:
${{ secrets.HF_TOKEN }}
test_name
:
gaie
extra_pytest_args
:
>-
-m framework_with_gaie
--frontend-image=${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-frontend
--image=${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-vllm-runtime-cuda12-amd64
deploy-status-check
:
deploy-status-check
:
runs-on
:
ubuntu-latest
runs-on
:
ubuntu-latest
needs
:
[
deploy-operator
,
deploy-test-vllm
,
deploy-test-sglang
,
deploy-test-trtllm
]
needs
:
[
deploy-operator
,
deploy-test-vllm
,
deploy-test-sglang
,
deploy-test-trtllm
,
deploy-test-gaie
]
if
:
always()
if
:
always()
steps
:
steps
:
-
name
:
"
Check
all
deploy
test
jobs"
-
name
:
"
Check
all
deploy
test
jobs"
...
@@ -383,7 +421,7 @@ jobs:
...
@@ -383,7 +421,7 @@ jobs:
name
:
Clean K8s builder if exists
name
:
Clean K8s builder if exists
runs-on
:
prod-default-small-v2
runs-on
:
prod-default-small-v2
if
:
always()
if
:
always()
needs
:
[
vllm-pipeline
,
sglang-pipeline
,
trtllm-pipeline
,
vllm-dev-pipeline
,
sglang-dev-pipeline
,
trtllm-dev-pipeline
,
vllm-efa-pipeline
,
trtllm-efa-pipeline
,
operator
]
needs
:
[
vllm-pipeline
,
sglang-pipeline
,
trtllm-pipeline
,
vllm-dev-pipeline
,
sglang-dev-pipeline
,
trtllm-dev-pipeline
,
vllm-efa-pipeline
,
trtllm-efa-pipeline
,
operator
,
frontend-image
]
steps
:
steps
:
-
name
:
Checkout repository
-
name
:
Checkout repository
uses
:
actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
# v4.3.0
uses
:
actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
# v4.3.0
...
@@ -404,7 +442,7 @@ jobs:
...
@@ -404,7 +442,7 @@ jobs:
name
:
Notify Slack
name
:
Notify Slack
runs-on
:
prod-builder-amd-v1
runs-on
:
prod-builder-amd-v1
if
:
always() && failure()
if
:
always() && failure()
needs
:
[
vllm-pipeline
,
sglang-pipeline
,
trtllm-pipeline
,
vllm-dev-pipeline
,
sglang-dev-pipeline
,
trtllm-dev-pipeline
,
vllm-efa-pipeline
,
trtllm-efa-pipeline
,
operator
,
deploy-operator
,
deploy-test-vllm
,
deploy-test-sglang
,
deploy-test-trtllm
]
needs
:
[
vllm-pipeline
,
sglang-pipeline
,
trtllm-pipeline
,
vllm-dev-pipeline
,
sglang-dev-pipeline
,
trtllm-dev-pipeline
,
vllm-efa-pipeline
,
trtllm-efa-pipeline
,
operator
,
frontend-image
,
deploy-operator
,
deploy-test-vllm
,
deploy-test-sglang
,
deploy-test-trtllm
,
deploy-test-gaie
]
permissions
:
permissions
:
contents
:
read
contents
:
read
steps
:
steps
:
...
...
.github/workflows/shared-deploy-test-framework.yml
View file @
8c2a4681
...
@@ -85,3 +85,4 @@ jobs:
...
@@ -85,3 +85,4 @@ jobs:
profile
:
${{ matrix.profile }}
profile
:
${{ matrix.profile }}
image
:
${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-${{ inputs.image_suffix }}
image
:
${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-${{ inputs.image_suffix }}
platform_arch
:
amd64
platform_arch
:
amd64
extra_pytest_args
:
-m framework_only
docs/kubernetes/inference-gateway.md
View file @
8c2a4681
...
@@ -122,7 +122,11 @@ For the HttpRoute service make sure to specify the namespace where your gateway
...
@@ -122,7 +122,11 @@ For the HttpRoute service make sure to specify the namespace where your gateway
```
bash
```
bash
cd
<dynamo-source-root>
cd
<dynamo-source-root>
# kubectl get httproutes -n my-model # Make sure you do not have an incompatible HttpRoute running, delete if so.
# kubectl get httproutes -n my-model # Make sure you do not have an incompatible HttpRoute running, delete if so.
# Choose disagg or agg example
kubectl apply
-f
examples/backends/vllm/deploy/gaie/disagg.yaml
-n
my-model
# or
kubectl apply
-f
examples/backends/vllm/deploy/gaie/agg.yaml
-n
my-model
kubectl apply
-f
examples/backends/vllm/deploy/gaie/agg.yaml
-n
my-model
# make sure to apply the route
kubectl apply
-f
examples/backends/vllm/deploy/gaie/http-route.yaml
-n
my-model
kubectl apply
-f
examples/backends/vllm/deploy/gaie/http-route.yaml
-n
my-model
```
```
...
...
examples/backends/vllm/deploy/gaie/agg.yaml
View file @
8c2a4681
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
apiVersion
:
nvidia.com/v1alpha1
apiVersion
:
nvidia.com/v1alpha1
kind
:
DynamoGraphDeployment
kind
:
DynamoGraphDeployment
metadata
:
metadata
:
name
:
qwen
-agg
name
:
qwen
spec
:
spec
:
backendFramework
:
vllm
backendFramework
:
vllm
services
:
services
:
...
...
examples/backends/vllm/deploy/gaie/disagg.yaml
0 → 100644
View file @
8c2a4681
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
apiVersion
:
nvidia.com/v1alpha1
kind
:
DynamoGraphDeployment
metadata
:
name
:
qwen
spec
:
backendFramework
:
vllm
services
:
Epp
:
envFromSecret
:
hf-token-secret
componentType
:
epp
replicas
:
1
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/epp-image:my-tag
imagePullPolicy
:
IfNotPresent
env
:
-
name
:
DYN_KV_CACHE_BLOCK_SIZE
value
:
"
16"
-
name
:
DYN_MODEL_NAME
value
:
"
Qwen/Qwen3-0.6B"
-
name
:
DYN_ENFORCE_DISAGG
value
:
"
true"
eppConfig
:
config
:
plugins
:
-
type
:
disagg-profile-handler
-
name
:
prefill-filter
type
:
label-filter
parameters
:
label
:
"
nvidia.com/dynamo-sub-component-type"
validValues
:
-
"
prefill"
allowsNoLabel
:
false
-
name
:
decode-filter
type
:
label-filter
parameters
:
label
:
"
nvidia.com/dynamo-sub-component-type"
validValues
:
-
"
decode"
allowsNoLabel
:
false
-
name
:
picker
type
:
max-score-picker
-
name
:
dyn-prefill
type
:
dyn-prefill-scorer
-
name
:
dyn-decode
type
:
dyn-decode-scorer
schedulingProfiles
:
-
name
:
prefill
plugins
:
-
pluginRef
:
prefill-filter
weight
:
1
-
pluginRef
:
dyn-prefill
weight
:
1
-
pluginRef
:
picker
weight
:
1
-
name
:
decode
plugins
:
-
pluginRef
:
decode-filter
weight
:
1
-
pluginRef
:
dyn-decode
weight
:
1
-
pluginRef
:
picker
weight
:
1
VllmPrefillWorker
:
componentType
:
worker
subComponentType
:
prefill
envFromSecret
:
hf-token-secret
sharedMemory
:
size
:
2Gi
frontendSidecar
:
image
:
docker.io/lambda108/dynamo:post-rebase
args
:
-
-m
-
dynamo.frontend
-
--router-mode
-
direct
envFromSecret
:
hf-token-secret
extraPodSpec
:
affinity
:
podAffinity
:
preferredDuringSchedulingIgnoredDuringExecution
:
-
weight
:
100
podAffinityTerm
:
labelSelector
:
matchExpressions
:
-
key
:
nvidia.com/dynamo-component-type
operator
:
In
values
:
-
worker
topologyKey
:
kubernetes.io/hostname
mainContainer
:
env
:
-
name
:
SERVED_MODEL_NAME
value
:
"
Qwen/Qwen3-0.6B"
-
name
:
MODEL_PATH
value
:
"
Qwen/Qwen3-0.6B"
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
1
--data-parallel-size
1
--disaggregation-mode
prefill
--kv-transfer-config
'{
\"
kv_connector
\"
:
\"
NixlConnector
\"
,
\"
kv_role
\"
:
\"
kv_both
\"
}'
--gpu-memory-utilization
0.90
--enable-prefix-caching
--block-size
16
--kv-events-config
'{
\"
enable_kv_cache_events
\"
:true}'"
command
:
-
/bin/sh
-
-c
image
:
docker.io/lambda108/dynamo:post-rebase
imagePullPolicy
:
IfNotPresent
workingDir
:
/workspace/examples/backends/vllm
replicas
:
1
resources
:
limits
:
gpu
:
"
1"
requests
:
gpu
:
"
1"
VllmDecodeWorker
:
componentType
:
worker
subComponentType
:
decode
envFromSecret
:
hf-token-secret
sharedMemory
:
size
:
2Gi
frontendSidecar
:
image
:
docker.io/lambda108/dynamo:post-rebase
args
:
-
-m
-
dynamo.frontend
-
--router-mode
-
direct
envFromSecret
:
hf-token-secret
extraPodSpec
:
affinity
:
podAffinity
:
preferredDuringSchedulingIgnoredDuringExecution
:
-
weight
:
100
podAffinityTerm
:
labelSelector
:
matchExpressions
:
-
key
:
nvidia.com/dynamo-component-type
operator
:
In
values
:
-
worker
topologyKey
:
kubernetes.io/hostname
mainContainer
:
env
:
-
name
:
SERVED_MODEL_NAME
value
:
"
Qwen/Qwen3-0.6B"
-
name
:
MODEL_PATH
value
:
"
Qwen/Qwen3-0.6B"
args
:
-
"
python3
-m
dynamo.vllm
--model
$MODEL_PATH
--served-model-name
$SERVED_MODEL_NAME
--tensor-parallel-size
1
--data-parallel-size
1
--kv-transfer-config
'{
\"
kv_connector
\"
:
\"
NixlConnector
\"
,
\"
kv_role
\"
:
\"
kv_both
\"
}'
--gpu-memory-utilization
0.90
--enable-prefix-caching
--block-size
16
--kv-events-config
'{
\"
enable_kv_cache_events
\"
:true}'"
command
:
-
/bin/sh
-
-c
image
:
docker.io/lambda108/dynamo:post-rebase
imagePullPolicy
:
IfNotPresent
workingDir
:
/workspace/examples/backends/vllm
replicas
:
1
resources
:
limits
:
gpu
:
"
1"
requests
:
gpu
:
"
1"
\ No newline at end of file
examples/backends/vllm/deploy/gaie/http-route.yaml
View file @
8c2a4681
...
@@ -18,18 +18,18 @@
...
@@ -18,18 +18,18 @@
apiVersion
:
gateway.networking.k8s.io/v1
apiVersion
:
gateway.networking.k8s.io/v1
kind
:
HTTPRoute
kind
:
HTTPRoute
metadata
:
metadata
:
name
:
qwen-
agg-
route
name
:
qwen-route
spec
:
spec
:
parentRefs
:
parentRefs
:
-
group
:
gateway.networking.k8s.io
-
group
:
gateway.networking.k8s.io
kind
:
Gateway
kind
:
Gateway
name
:
inference-gateway
name
:
inference-gateway
namespace
:
my-model
# the namespace where your gateway is deployed.
namespace
:
default
# the namespace where your gateway is deployed.
rules
:
rules
:
-
backendRefs
:
-
backendRefs
:
-
group
:
inference.networking.k8s.io
-
group
:
inference.networking.k8s.io
kind
:
InferencePool
kind
:
InferencePool
name
:
qwen-
agg-
pool
name
:
qwen-pool
port
:
8000
port
:
8000
weight
:
1
weight
:
1
matches
:
matches
:
...
...
pyproject.toml
View file @
8c2a4681
...
@@ -254,6 +254,8 @@ markers = [
...
@@ -254,6 +254,8 @@ markers = [
"k8s: marks tests as requiring Kubernetes"
,
"k8s: marks tests as requiring Kubernetes"
,
"fault_tolerance: marks tests as fault tolerance tests"
,
"fault_tolerance: marks tests as fault tolerance tests"
,
"deploy: marks tests as deployment tests"
,
"deploy: marks tests as deployment tests"
,
"framework_only: marks standard framework deployment tests (vllm, sglang, trtllm)"
,
"framework_with_gaie: marks tests for GAIE (Gateway API Inference Extension) deployment"
,
# Built-in markers
# Built-in markers
"skip: skip this test"
,
"skip: skip this test"
,
"skipif: skip if condition is true"
,
"skipif: skip if condition is true"
,
...
...
recipes/llama-3-70b/vllm/disagg-single-node/gaie/deploy.yaml
View file @
8c2a4681
...
@@ -22,6 +22,8 @@ spec:
...
@@ -22,6 +22,8 @@ spec:
value
:
"
128"
value
:
"
128"
-
name
:
DYN_MODEL_NAME
-
name
:
DYN_MODEL_NAME
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
value
:
"
RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
-
name
:
DYN_ENFORCE_DISAGG
value
:
"
true"
eppConfig
:
eppConfig
:
config
:
config
:
plugins
:
plugins
:
...
@@ -81,6 +83,10 @@ spec:
...
@@ -81,6 +83,10 @@ spec:
-
direct
-
direct
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
extraPodSpec
:
extraPodSpec
:
tolerations
:
-
key
:
nvidia.com/gpu
operator
:
Exists
effect
:
NoSchedule
affinity
:
affinity
:
podAffinity
:
podAffinity
:
preferredDuringSchedulingIgnoredDuringExecution
:
preferredDuringSchedulingIgnoredDuringExecution
:
...
@@ -132,6 +138,10 @@ spec:
...
@@ -132,6 +138,10 @@ spec:
-
direct
-
direct
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
extraPodSpec
:
extraPodSpec
:
tolerations
:
-
key
:
nvidia.com/gpu
operator
:
Exists
effect
:
NoSchedule
affinity
:
affinity
:
podAffinity
:
podAffinity
:
preferredDuringSchedulingIgnoredDuringExecution
:
preferredDuringSchedulingIgnoredDuringExecution
:
...
...
tests/deploy/conftest.py
View file @
8c2a4681
...
@@ -40,6 +40,12 @@ def pytest_addoption(parser: pytest.Parser) -> None:
...
@@ -40,6 +40,12 @@ def pytest_addoption(parser: pytest.Parser) -> None:
help
=
"Deployment profile to test (e.g., agg, disagg, disagg_router). "
help
=
"Deployment profile to test (e.g., agg, disagg, disagg_router). "
"If not specified, runs all profiles for the selected framework."
,
"If not specified, runs all profiles for the selected framework."
,
)
)
parser
.
addoption
(
"--frontend-image"
,
type
=
str
,
default
=
None
,
help
=
"Frontend container image (used by GAIE deploy tests)."
,
)
@
dataclass
(
frozen
=
True
)
@
dataclass
(
frozen
=
True
)
...
...
tests/deploy/test_deploy.py
View file @
8c2a4681
...
@@ -9,14 +9,23 @@ to chat completion requests correctly.
...
@@ -9,14 +9,23 @@ to chat completion requests correctly.
"""
"""
import
logging
import
logging
import
os
import
subprocess
import
time
from
typing
import
Any
,
Dict
from
typing
import
Any
,
Dict
import
kr8s
import
pytest
import
pytest
import
requests
import
requests
import
yaml
from
tests.deploy.conftest
import
DeploymentTarget
from
tests.deploy.conftest
import
DeploymentTarget
from
tests.utils.client
import
send_request
,
wait_for_model_availability
from
tests.utils.client
import
send_request
,
wait_for_model_availability
from
tests.utils.managed_deployment
import
DeploymentSpec
,
ManagedDeployment
from
tests.utils.managed_deployment
import
(
DeploymentSpec
,
ManagedDeployment
,
_get_workspace_dir
,
)
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -38,6 +47,7 @@ DEFAULT_REQUEST_TIMEOUT = 120
...
@@ -38,6 +47,7 @@ DEFAULT_REQUEST_TIMEOUT = 120
# Minimum response content length to validate that the model is generating meaningful output.
# Minimum response content length to validate that the model is generating meaningful output.
# This matches the validation threshold from the original shell-based deployment tests.
# This matches the validation threshold from the original shell-based deployment tests.
MIN_RESPONSE_CONTENT_LENGTH
=
100
MIN_RESPONSE_CONTENT_LENGTH
=
100
GAIE_MODEL_NAME
=
"Qwen/Qwen3-0.6B"
def
validate_chat_response
(
def
validate_chat_response
(
...
@@ -100,6 +110,7 @@ def validate_chat_response(
...
@@ -100,6 +110,7 @@ def validate_chat_response(
return
data
return
data
@
pytest
.
mark
.
framework_only
@
pytest
.
mark
.
k8s
@
pytest
.
mark
.
k8s
@
pytest
.
mark
.
deploy
@
pytest
.
mark
.
deploy
@
pytest
.
mark
.
post_merge
@
pytest
.
mark
.
post_merge
...
@@ -213,3 +224,203 @@ async def test_deployment(
...
@@ -213,3 +224,203 @@ async def test_deployment(
f
"Deployment test PASSED for
{
deployment_target
.
test_id
}
"
f
"Deployment test PASSED for
{
deployment_target
.
test_id
}
"
f
"(source:
{
deployment_target
.
source
}
, model:
{
model
}
, namespace:
{
namespace
}
)"
f
"(source:
{
deployment_target
.
source
}
, model:
{
model
}
, namespace:
{
namespace
}
)"
)
)
# GAIE (Gateway API Inference Extension) deployment test
@
pytest
.
mark
.
framework_with_gaie
@
pytest
.
mark
.
k8s
@
pytest
.
mark
.
deploy
@
pytest
.
mark
.
post_merge
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
timeout
(
900
)
async
def
test_gaie_deployment
(
image
:
str
,
namespace
:
str
,
skip_service_restart
:
bool
,
request
,
)
->
None
:
"""Test GAIE disaggregated deployment with vLLM workers.
Applies the GAIE DynamoGraphDeployment (with CI-built images) and the
companion HTTPRoute, then verifies inference works end-to-end through
the full Gateway path.
"""
frontend_image
=
request
.
config
.
getoption
(
"--frontend-image"
)
worker_image
=
image
assert
frontend_image
,
"--frontend-image is required for GAIE deploy test"
assert
worker_image
,
"--image is required for GAIE deploy test"
assert
namespace
,
"--namespace is required for GAIE deploy test"
workspace
=
_get_workspace_dir
()
gaie_dir
=
os
.
path
.
join
(
workspace
,
"examples"
,
"backends"
,
"vllm"
,
"deploy"
,
"gaie"
)
disagg_path
=
os
.
path
.
join
(
gaie_dir
,
"disagg.yaml"
)
httproute_path
=
os
.
path
.
join
(
gaie_dir
,
"http-route.yaml"
)
assert
os
.
path
.
exists
(
disagg_path
),
f
"disagg.yaml not found:
{
disagg_path
}
"
assert
os
.
path
.
exists
(
httproute_path
),
f
"http-route.yaml not found:
{
httproute_path
}
"
deployment_spec
=
DeploymentSpec
(
disagg_path
)
deployment_spec
.
namespace
=
namespace
logger
.
info
(
f
"Frontend image:
{
frontend_image
}
"
)
logger
.
info
(
f
"Worker image:
{
worker_image
}
"
)
deployment_spec
.
set_image
(
frontend_image
,
service_name
=
"Epp"
)
for
worker
in
(
"VllmPrefillWorker"
,
"VllmDecodeWorker"
):
deployment_spec
.
set_image
(
worker_image
,
service_name
=
worker
)
deployment_spec
.
set_frontend_sidecar_image
(
frontend_image
,
service_name
=
worker
)
route_hostname
=
f
"
{
namespace
}
.example.com"
logger
.
info
(
f
"HTTPRoute hostname:
{
route_hostname
}
"
)
with
open
(
httproute_path
)
as
f
:
httproute_spec
=
yaml
.
safe_load
(
f
)
httproute_spec
[
"spec"
][
"hostnames"
]
=
[
route_hostname
]
httproute_yaml
=
yaml
.
safe_dump
(
httproute_spec
)
logger
.
info
(
"Applying GAIE HTTPRoute..."
)
result
=
subprocess
.
run
(
[
"kubectl"
,
"apply"
,
"-n"
,
namespace
,
"-f"
,
"-"
],
input
=
httproute_yaml
,
capture_output
=
True
,
text
=
True
,
)
logger
.
info
(
f
"HTTPRoute apply stdout:
{
result
.
stdout
}
"
)
if
result
.
stderr
:
logger
.
warning
(
f
"HTTPRoute apply stderr:
{
result
.
stderr
}
"
)
assert
result
.
returncode
==
0
,
f
"Failed to apply HTTPRoute:
{
result
.
stderr
}
"
# Debug: verify namespace state before creating DGD
logger
.
info
(
f
"Namespace:
{
namespace
}
"
)
ns_check
=
subprocess
.
run
(
[
"kubectl"
,
"get"
,
"namespace"
,
namespace
],
capture_output
=
True
,
text
=
True
,
)
logger
.
info
(
f
"Namespace check:
{
ns_check
.
stdout
.
strip
()
}
"
)
if
ns_check
.
returncode
!=
0
:
logger
.
error
(
f
"Namespace not found:
{
ns_check
.
stderr
}
"
)
# Debug: check if operator CRD is registered
crd_check
=
subprocess
.
run
(
[
"kubectl"
,
"get"
,
"crd"
,
"dynamographdeployments.nvidia.com"
],
capture_output
=
True
,
text
=
True
,
)
logger
.
info
(
f
"CRD check:
{
crd_check
.
stdout
.
strip
()
}
"
)
if
crd_check
.
returncode
!=
0
:
logger
.
error
(
f
"CRD not found:
{
crd_check
.
stderr
}
"
)
# Debug: check operator pod status
operator_check
=
subprocess
.
run
(
[
"kubectl"
,
"get"
,
"pods"
,
"-n"
,
namespace
,
"-l"
,
"app.kubernetes.io/name=dynamo-operator"
,
],
capture_output
=
True
,
text
=
True
,
)
logger
.
info
(
f
"Operator pods:
{
operator_check
.
stdout
.
strip
()
}
"
)
# Debug: log the full deployment spec being submitted
logger
.
info
(
f
"DGD name:
{
deployment_spec
.
name
}
"
)
logger
.
info
(
f
"DGD namespace:
{
deployment_spec
.
namespace
}
"
)
logger
.
info
(
f
"DGD services:
{
[
s
.
name
for
s
in
deployment_spec
.
services
]
}
"
)
async
with
ManagedDeployment
(
log_dir
=
request
.
node
.
name
,
deployment_spec
=
deployment_spec
,
namespace
=
namespace
,
skip_service_restart
=
skip_service_restart
,
frontend_service_name
=
"Epp"
,
)
as
deployment
:
# Debug: check what DGDs exist after creation
dgd_check
=
subprocess
.
run
(
[
"kubectl"
,
"get"
,
"dynamographdeployments"
,
"-n"
,
namespace
],
capture_output
=
True
,
text
=
True
,
)
logger
.
info
(
f
"DGDs after creation:
{
dgd_check
.
stdout
.
strip
()
}
"
)
pod_check
=
subprocess
.
run
(
[
"kubectl"
,
"get"
,
"pods"
,
"-n"
,
namespace
,
"-o"
,
"wide"
],
capture_output
=
True
,
text
=
True
,
)
logger
.
info
(
f
"Pods after creation:
{
pod_check
.
stdout
.
strip
()
}
"
)
epp_pods
=
deployment
.
get_pods
([
"Epp"
])
epp_pod_list
=
epp_pods
.
get
(
"Epp"
,
[])
assert
len
(
epp_pod_list
)
>
0
,
"No EPP pods found for GAIE deployment"
logger
.
info
(
f
"Found EPP pod:
{
epp_pod_list
[
0
].
name
}
"
)
gateway_svcs
=
list
(
kr8s
.
get
(
"services"
,
"inference-gateway"
,
namespace
=
namespace
)
)
assert
(
len
(
gateway_svcs
)
>
0
),
f
"inference-gateway service not found in namespace
{
namespace
}
"
gateway_pf
=
gateway_svcs
[
0
].
portforward
(
remote_port
=
80
,
local_port
=
0
)
gateway_pf
.
start
()
time
.
sleep
(
2
)
try
:
gateway_url
=
f
"http://localhost:
{
gateway_pf
.
local_port
}
"
logger
.
info
(
f
"Gateway port-forward established:
{
gateway_url
}
"
)
endpoint
=
deployment_spec
.
endpoint
headers
=
{
"Host"
:
route_hostname
}
logger
.
info
(
f
"Using Host header:
{
route_hostname
}
"
)
model_ready
=
wait_for_model_availability
(
url
=
gateway_url
,
endpoint
=
endpoint
,
model
=
GAIE_MODEL_NAME
,
logger
=
logger
,
max_attempts
=
30
,
headers
=
headers
,
)
assert
model_ready
,
(
f
"Model '
{
GAIE_MODEL_NAME
}
' did not become available "
f
"within the timeout period"
)
url
=
f
"
{
gateway_url
}{
endpoint
}
"
payload
=
{
"model"
:
GAIE_MODEL_NAME
,
"messages"
:
[{
"role"
:
"user"
,
"content"
:
TEST_PROMPT
}],
"max_tokens"
:
DEFAULT_MAX_TOKENS
,
"temperature"
:
DEFAULT_TEMPERATURE
,
"stream"
:
False
,
}
logger
.
info
(
f
"Sending inference request to
{
url
}
"
)
response
=
requests
.
post
(
url
,
json
=
payload
,
headers
=
headers
,
timeout
=
DEFAULT_REQUEST_TIMEOUT
,
)
validate_chat_response
(
response
=
response
,
expected_model
=
GAIE_MODEL_NAME
,
min_content_length
=
MIN_RESPONSE_CONTENT_LENGTH
,
)
data
=
response
.
json
()
content
=
data
[
"choices"
][
0
][
"message"
][
"content"
]
logger
.
info
(
f
"GAIE deployment test PASSED | "
f
"model=
{
data
[
'model'
]
}
, status=
{
response
.
status_code
}
, "
f
"response_length=
{
len
(
content
)
}
chars
\n
"
f
"Model response:
{
content
}
"
)
finally
:
gateway_pf
.
stop
()
tests/utils/client.py
View file @
8c2a4681
...
@@ -159,6 +159,7 @@ def wait_for_model_availability(
...
@@ -159,6 +159,7 @@ def wait_for_model_availability(
logger
:
logging
.
Logger
,
logger
:
logging
.
Logger
,
max_attempts
:
int
=
15
,
max_attempts
:
int
=
15
,
attempt_timeouts
:
list
[
float
]
|
None
=
None
,
attempt_timeouts
:
list
[
float
]
|
None
=
None
,
headers
:
dict
[
str
,
str
]
|
None
=
None
,
)
->
bool
:
)
->
bool
:
"""
"""
Wait for model to be available by sending test requests.
Wait for model to be available by sending test requests.
...
@@ -197,7 +198,9 @@ def wait_for_model_availability(
...
@@ -197,7 +198,9 @@ def wait_for_model_availability(
logger
.
debug
(
logger
.
debug
(
f
"Testing model availability at
{
test_url
}
(attempt
{
attempt
+
1
}
/
{
max_attempts
}
, timeout=
{
timeout_val
}
s)"
f
"Testing model availability at
{
test_url
}
(attempt
{
attempt
+
1
}
/
{
max_attempts
}
, timeout=
{
timeout_val
}
s)"
)
)
response
=
requests
.
post
(
test_url
,
json
=
test_payload
,
timeout
=
timeout_val
)
response
=
requests
.
post
(
test_url
,
json
=
test_payload
,
timeout
=
timeout_val
,
headers
=
headers
)
if
response
.
status_code
==
200
:
if
response
.
status_code
==
200
:
logger
.
info
(
f
"Model '
{
model
}
' is available and responding"
)
logger
.
info
(
f
"Model '
{
model
}
' is available and responding"
)
...
...
tests/utils/managed_deployment.py
View file @
8c2a4681
...
@@ -66,6 +66,20 @@ class ServiceSpec:
...
@@ -66,6 +66,20 @@ class ServiceSpec:
self
.
_spec
[
"extraPodSpec"
][
"mainContainer"
]
=
{}
self
.
_spec
[
"extraPodSpec"
][
"mainContainer"
]
=
{}
self
.
_spec
[
"extraPodSpec"
][
"mainContainer"
][
"image"
]
=
value
self
.
_spec
[
"extraPodSpec"
][
"mainContainer"
][
"image"
]
=
value
@
property
def
frontend_sidecar_image
(
self
)
->
Optional
[
str
]:
"""Container image for the frontendSidecar (if present)."""
try
:
return
self
.
_spec
[
"frontendSidecar"
][
"image"
]
except
KeyError
:
return
None
@
frontend_sidecar_image
.
setter
def
frontend_sidecar_image
(
self
,
value
:
str
):
if
"frontendSidecar"
not
in
self
.
_spec
:
self
.
_spec
[
"frontendSidecar"
]
=
{}
self
.
_spec
[
"frontendSidecar"
][
"image"
]
=
value
@
property
@
property
def
envs
(
self
)
->
list
[
dict
[
str
,
str
]]:
def
envs
(
self
)
->
list
[
dict
[
str
,
str
]]:
"""Environment variables for the service"""
"""Environment variables for the service"""
...
@@ -230,6 +244,16 @@ class DeploymentSpec:
...
@@ -230,6 +244,16 @@ class DeploymentSpec:
for
service
in
services
:
for
service
in
services
:
service
.
image
=
image
service
.
image
=
image
def
set_frontend_sidecar_image
(
self
,
image
:
str
,
service_name
:
Optional
[
str
]
=
None
):
if
service_name
is
None
:
services
=
self
.
services
else
:
services
=
[
self
[
service_name
]]
for
service
in
services
:
service
.
frontend_sidecar_image
=
image
def
set_tensor_parallel
(
self
,
tp_size
:
int
,
service_names
:
Optional
[
list
]
=
None
):
def
set_tensor_parallel
(
self
,
tp_size
:
int
,
service_names
:
Optional
[
list
]
=
None
):
"""Scale deployment for different tensor parallel configurations
"""Scale deployment for different tensor parallel configurations
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment