Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
7e499b5c
Unverified
Commit
7e499b5c
authored
Dec 02, 2025
by
Yan Ru Pei
Committed by
GitHub
Dec 02, 2025
Browse files
test: bring back the framework 1 gpu pre-merge tests + clean up pytest markers (#4698)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
3cad926e
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
78 additions
and
179 deletions
+78
-179
.github/workflows/container-validation-backends.yml
.github/workflows/container-validation-backends.yml
+9
-36
.github/workflows/container-validation-dynamo.yml
.github/workflows/container-validation-dynamo.yml
+2
-2
components/src/dynamo/common/utils/prometheus.py
components/src/dynamo/common/utils/prometheus.py
+1
-1
components/src/dynamo/sglang/tests/test_sglang_prometheus_utils.py
...s/src/dynamo/sglang/tests/test_sglang_prometheus_utils.py
+2
-1
components/src/dynamo/trtllm/tests/test_trtllm_autodeploy.py
components/src/dynamo/trtllm/tests/test_trtllm_autodeploy.py
+1
-0
components/src/dynamo/trtllm/tests/test_trtllm_prometheus_utils.py
...s/src/dynamo/trtllm/tests/test_trtllm_prometheus_utils.py
+1
-0
components/src/dynamo/trtllm/tests/test_trtllm_unit.py
components/src/dynamo/trtllm/tests/test_trtllm_unit.py
+1
-0
components/src/dynamo/vllm/tests/test_vllm_prometheus_utils.py
...nents/src/dynamo/vllm/tests/test_vllm_prometheus_utils.py
+2
-1
tests/README.md
tests/README.md
+2
-1
tests/fault_tolerance/cancellation/test_trtllm.py
tests/fault_tolerance/cancellation/test_trtllm.py
+7
-16
tests/fault_tolerance/migration/test_vllm.py
tests/fault_tolerance/migration/test_vllm.py
+8
-20
tests/frontend/test_completion_mocker_engine.py
tests/frontend/test_completion_mocker_engine.py
+7
-16
tests/frontend/test_vllm.py
tests/frontend/test_vllm.py
+7
-16
tests/router/test_router_e2e_with_mockers.py
tests/router/test_router_e2e_with_mockers.py
+6
-49
tests/router/test_router_e2e_with_vllm.py
tests/router/test_router_e2e_with_vllm.py
+7
-9
tests/serve/test_sglang.py
tests/serve/test_sglang.py
+8
-4
tests/serve/test_trtllm.py
tests/serve/test_trtllm.py
+3
-3
tests/serve/test_vllm.py
tests/serve/test_vllm.py
+4
-4
No files found.
.github/workflows/container-validation-backends.yml
View file @
7e499b5c
...
...
@@ -179,23 +179,14 @@ jobs:
azure_acr_user
:
${{ secrets.AZURE_ACR_USER }}
azure_acr_password
:
${{ secrets.AZURE_ACR_PASSWORD }}
-
name
:
Run
unit
tests
-
name
:
Run tests
if
:
${{ matrix.platform.arch != 'arm64' }}
uses
:
./.github/actions/pytest
with
:
image_tag
:
${{ steps.build-image.outputs.image_tag }}
pytest_marks
:
"
unit
and
vllm
and
gpu_1
"
pytest_marks
:
"
pre_merge
and
vllm
"
framework
:
"
vllm"
test_type
:
"
unit"
platform_arch
:
${{ matrix.platform.arch }}
-
name
:
Run e2e tests
if
:
${{ matrix.platform.arch != 'arm64' }}
uses
:
./.github/actions/pytest
with
:
image_tag
:
${{ steps.build-image.outputs.image_tag }}
pytest_marks
:
"
e2e
and
vllm
and
gpu_1
and
not
slow"
framework
:
"
vllm"
test_type
:
"
e2e,
gpu_1"
test_type
:
"
pre_merge"
platform_arch
:
${{ matrix.platform.arch }}
sglang
:
...
...
@@ -246,23 +237,14 @@ jobs:
azure_acr_user
:
${{ secrets.AZURE_ACR_USER }}
azure_acr_password
:
${{ secrets.AZURE_ACR_PASSWORD }}
-
name
:
Run unit tests
if
:
${{ matrix.platform.arch != 'arm64' }}
uses
:
./.github/actions/pytest
with
:
image_tag
:
${{ steps.build-image.outputs.image_tag }}
pytest_marks
:
"
unit
and
sglang
and
gpu_1"
framework
:
"
sglang"
test_type
:
"
unit"
platform_arch
:
${{ matrix.platform.arch }}
-
name
:
Run e2e tests
-
name
:
Run tests
if
:
${{ matrix.platform.arch != 'arm64' }}
uses
:
./.github/actions/pytest
with
:
image_tag
:
${{ steps.build-image.outputs.image_tag }}
pytest_marks
:
"
e2
e
and
sglang
and
gpu_1
"
pytest_marks
:
"
pre_merg
e
and
sglang"
framework
:
"
sglang"
test_type
:
"
e2e,
gpu_1
"
test_type
:
"
pre_merge
"
platform_arch
:
${{ matrix.platform.arch }}
trtllm
:
...
...
@@ -313,23 +295,14 @@ jobs:
azure_acr_user
:
${{ secrets.AZURE_ACR_USER }}
azure_acr_password
:
${{ secrets.AZURE_ACR_PASSWORD }}
-
name
:
Run unit tests
if
:
${{ matrix.platform.arch != 'arm64' }}
uses
:
./.github/actions/pytest
with
:
image_tag
:
${{ steps.build-image.outputs.image_tag }}
pytest_marks
:
"
unit
and
trtllm
and
gpu_1"
framework
:
"
trtllm"
test_type
:
"
unit"
platform_arch
:
${{ matrix.platform.arch }}
-
name
:
Run e2e tests
-
name
:
Run tests
if
:
${{ matrix.platform.arch != 'arm64' }}
uses
:
./.github/actions/pytest
with
:
image_tag
:
${{ steps.build-image.outputs.image_tag }}
pytest_marks
:
"
e2
e
and
trtllm
and
gpu_1
and
not
slow
"
pytest_marks
:
"
pre_merg
e
and
trtllm"
framework
:
"
trtllm"
test_type
:
"
e2e,
gpu_1
"
test_type
:
"
pre_merge
"
platform_arch
:
${{ matrix.platform.arch }}
deploy-test-fault-tolerance
:
...
...
.github/workflows/container-validation-dynamo.yml
View file @
7e499b5c
...
...
@@ -65,7 +65,7 @@ jobs:
docker compose down
-
name
:
Run pytest (parallel tests with xdist)
env
:
PYTEST_MARKS
:
"
pre_merge
and
parallel"
PYTEST_MARKS
:
"
pre_merge
and
parallel
and
not
(vllm
or
sglang
or
trtllm)
"
run
:
|
docker run -w /workspace \
--name ${{ env.CONTAINER_ID }}_pytest_parallel \
...
...
@@ -77,7 +77,7 @@ jobs:
docker cp ${{ env.CONTAINER_ID }}_pytest_parallel:/workspace/${{ env.PYTEST_PARALLEL_XML_FILE }} . || echo "No parallel test report found"
-
name
:
Run pytest (sequential tests)
env
:
PYTEST_MARKS
:
"
(pre_merge
and
not
parallel)
or
mypy"
PYTEST_MARKS
:
"
(
(
pre_merge
and
not
parallel)
or
mypy
)
and
not
(vllm
or
sglang
or
trtllm)
"
run
:
|
docker run -w /workspace \
--name ${{ env.CONTAINER_ID }}_pytest \
...
...
components/src/dynamo/common/utils/prometheus.py
View file @
7e499b5c
...
...
@@ -55,7 +55,7 @@ def register_engine_metrics_callback(
# Include multiple metric prefixes
register_engine_metrics_callback(
generate_endpoint, REGISTRY, metric_prefix_filter=["vllm:", "lmcache:"]
generate_endpoint, REGISTRY, metric_prefix_filter
s
=["vllm:", "lmcache:"]
)
# With filtering and prefixing for TensorRT-LLM
...
...
components/src/dynamo/sglang/tests/test_sglang_prometheus_utils.py
View file @
7e499b5c
...
...
@@ -13,6 +13,7 @@ pytestmark = [
pytest
.
mark
.
unit
,
pytest
.
mark
.
sglang
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
post_merge
,
]
...
...
@@ -58,7 +59,7 @@ sglang:cache_hit_rate{model_name="meta-llama/Llama-3.1-8B-Instruct"} 0.0075
"""Test SGLang use case: filter to sglang: metrics and exclude python_/process_."""
result
=
get_prometheus_expfmt
(
sglang_registry
,
metric_prefix_filter
=
"sglang:"
,
metric_prefix_filter
s
=
[
"sglang:"
]
,
exclude_prefixes
=
[
"python_"
,
"process_"
],
)
...
...
components/src/dynamo/trtllm/tests/test_trtllm_autodeploy.py
View file @
7e499b5c
...
...
@@ -19,6 +19,7 @@ pytestmark = [
# `.github/workflows/container-validation-backends.yml` does not make use of
# the `gpu_0` marker.
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
,
]
_PYTORCH_LLM_CLS_NAME
=
"dynamo.trtllm.engine.LLM"
_AUTODEPLOY_LLM_CLS_NAME
=
"tensorrt_llm._torch.auto_deploy.LLM"
...
...
components/src/dynamo/trtllm/tests/test_trtllm_prometheus_utils.py
View file @
7e499b5c
...
...
@@ -13,6 +13,7 @@ pytestmark = [
pytest
.
mark
.
unit
,
pytest
.
mark
.
trtllm
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
post_merge
,
]
...
...
components/src/dynamo/trtllm/tests/test_trtllm_unit.py
View file @
7e499b5c
...
...
@@ -23,6 +23,7 @@ pytestmark = [
pytest
.
mark
.
unit
,
pytest
.
mark
.
trtllm
,
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
,
]
...
...
components/src/dynamo/vllm/tests/test_vllm_prometheus_utils.py
View file @
7e499b5c
...
...
@@ -13,6 +13,7 @@ pytestmark = [
pytest
.
mark
.
unit
,
pytest
.
mark
.
vllm
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
post_merge
,
]
...
...
@@ -56,7 +57,7 @@ vllm:time_to_first_token_seconds_count{model_name="meta-llama/Llama-3.1-8B"} 165
"""Test vLLM use case: filter to vllm: metrics and exclude python_/process_."""
result
=
get_prometheus_expfmt
(
vllm_registry
,
metric_prefix_filter
=
"vllm:"
,
metric_prefix_filter
s
=
[
"vllm:"
]
,
exclude_prefixes
=
[
"python_"
,
"process_"
],
)
...
...
tests/README.md
View file @
7e499b5c
...
...
@@ -71,7 +71,8 @@ Markers are required for all tests. They are used for test selection in CI and l
| Test Type [required] | unit, integration, e2e, benchmark, stress, multimodal | Nature of the test |
| Hardware [required] | gpu_0, gpu_1, gpu_2, gpu_4, gpu_8, h100 | Number/type of GPUs required |
| Component/Framework | vllm, trtllm, sglang, kvbm, planner, router | Backend or component specificity |
| Other | slow, skip, xfail | Special handling |
| Execution | parallel | Test can run in parallel with pytest-xdist |
| Other | slow, skip, xfail, mypy, custom_build | Special handling |
### Example
```
python
...
...
tests/fault_tolerance/cancellation/test_trtllm.py
View file @
7e499b5c
...
...
@@ -21,6 +21,13 @@ from tests.utils.payloads import check_health_generate, check_models_api
logger
=
logging
.
getLogger
(
__name__
)
pytestmark
=
[
pytest
.
mark
.
trtllm
,
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
e2e
,
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
),
]
class
DynamoWorkerProcess
(
ManagedProcess
):
"""Process manager for Dynamo worker with TensorRT-LLM backend"""
...
...
@@ -127,10 +134,6 @@ class DynamoWorkerProcess(ManagedProcess):
return
False
@
pytest
.
mark
.
trtllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
nightly
def
test_request_cancellation_trtllm_aggregated
(
request
,
runtime_services
,
predownload_models
...
...
@@ -205,10 +208,6 @@ def test_request_cancellation_trtllm_aggregated(
logger
.
info
(
f
"
{
description
}
detected successfully"
)
@
pytest
.
mark
.
trtllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
nightly
def
test_request_cancellation_trtllm_decode_cancel
(
request
,
runtime_services
,
predownload_models
...
...
@@ -282,11 +281,7 @@ def test_request_cancellation_trtllm_decode_cancel(
)
@
pytest
.
mark
.
trtllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
nightly
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
def
test_request_cancellation_trtllm_prefill_cancel
(
request
,
runtime_services
,
predownload_models
):
...
...
@@ -369,10 +364,6 @@ def test_request_cancellation_trtllm_prefill_cancel(
)
@
pytest
.
mark
.
trtllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
xfail
(
reason
=
"May fail due to unknown reason with TRT-LLM or backend implementation"
,
strict
=
False
,
...
...
tests/fault_tolerance/migration/test_vllm.py
View file @
7e499b5c
...
...
@@ -23,6 +23,14 @@ from .utils import (
logger
=
logging
.
getLogger
(
__name__
)
pytestmark
=
[
pytest
.
mark
.
vllm
,
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
e2e
,
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
),
pytest
.
mark
.
nightly
,
]
class
DynamoWorkerProcess
(
ManagedProcess
):
"""Process manager for Dynamo worker with vLLM backend"""
...
...
@@ -100,11 +108,6 @@ class DynamoWorkerProcess(ManagedProcess):
return
False
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
nightly
def
test_request_migration_vllm_worker_failure
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
):
...
...
@@ -151,11 +154,6 @@ def test_request_migration_vllm_worker_failure(
verify_migration_occurred
(
frontend
)
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
nightly
def
test_request_migration_vllm_graceful_shutdown
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
):
...
...
@@ -203,11 +201,6 @@ def test_request_migration_vllm_graceful_shutdown(
verify_migration_occurred
(
frontend
)
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
nightly
def
test_no_request_migration_vllm_worker_failure
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
):
...
...
@@ -268,11 +261,6 @@ def test_no_request_migration_vllm_worker_failure(
),
f
"Unexpected migration message:
{
e
}
"
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
nightly
def
test_no_request_migration_vllm_graceful_shutdown
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
):
...
...
tests/frontend/test_completion_mocker_engine.py
View file @
7e499b5c
...
...
@@ -22,6 +22,13 @@ logger = logging.getLogger(__name__)
TEST_MODEL
=
QWEN
pytestmark
=
[
pytest
.
mark
.
e2e
,
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
post_merge
,
pytest
.
mark
.
model
(
TEST_MODEL
),
]
class
DynamoFrontendProcess
(
ManagedProcess
):
"""Process manager for Dynamo frontend"""
...
...
@@ -145,10 +152,6 @@ def start_services(request, runtime_services):
@
pytest
.
mark
.
usefixtures
(
"start_services"
)
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
post_merge
@
pytest
.
mark
.
model
(
TEST_MODEL
)
def
test_completion_string_prompt
()
->
None
:
payload
:
Dict
[
str
,
Any
]
=
{
"model"
:
TEST_MODEL
,
...
...
@@ -165,10 +168,6 @@ def test_completion_string_prompt() -> None:
@
pytest
.
mark
.
usefixtures
(
"start_services"
)
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
post_merge
@
pytest
.
mark
.
model
(
TEST_MODEL
)
def
test_completion_empty_array_prompt
()
->
None
:
payload
:
Dict
[
str
,
Any
]
=
{
"model"
:
TEST_MODEL
,
...
...
@@ -185,10 +184,6 @@ def test_completion_empty_array_prompt() -> None:
@
pytest
.
mark
.
usefixtures
(
"start_services"
)
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
post_merge
@
pytest
.
mark
.
model
(
TEST_MODEL
)
def
test_completion_single_element_array_prompt
()
->
None
:
payload
:
Dict
[
str
,
Any
]
=
{
"model"
:
TEST_MODEL
,
...
...
@@ -205,10 +200,6 @@ def test_completion_single_element_array_prompt() -> None:
@
pytest
.
mark
.
usefixtures
(
"start_services"
)
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
post_merge
@
pytest
.
mark
.
model
(
TEST_MODEL
)
def
test_completion_multi_element_array_prompt
()
->
None
:
payload
:
Dict
[
str
,
Any
]
=
{
"model"
:
TEST_MODEL
,
...
...
tests/frontend/test_vllm.py
View file @
7e499b5c
...
...
@@ -22,6 +22,13 @@ logger = logging.getLogger(__name__)
TEST_MODEL
=
GPT_OSS
pytestmark
=
[
pytest
.
mark
.
vllm
,
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
e2e
,
pytest
.
mark
.
model
(
TEST_MODEL
),
]
WEATHER_TOOL
=
{
"type"
:
"function"
,
"function"
:
{
...
...
@@ -211,11 +218,7 @@ def _validate_chat_response(response: requests.Response) -> Dict[str, Any]:
@
pytest
.
mark
.
usefixtures
(
"start_services"
)
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
post_merge
@
pytest
.
mark
.
model
(
TEST_MODEL
)
def
test_reasoning_effort
(
request
,
runtime_services
,
predownload_models
)
->
None
:
"""High reasoning effort should yield more detailed reasoning than low effort."""
...
...
@@ -278,11 +281,7 @@ def test_reasoning_effort(request, runtime_services, predownload_models) -> None
@
pytest
.
mark
.
usefixtures
(
"start_services"
)
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
post_merge
@
pytest
.
mark
.
model
(
TEST_MODEL
)
def
test_tool_calling
(
request
,
runtime_services
,
predownload_models
)
->
None
:
"""Test tool calling functionality with weather and system health tools."""
...
...
@@ -321,11 +320,7 @@ def test_tool_calling(request, runtime_services, predownload_models) -> None:
@
pytest
.
mark
.
usefixtures
(
"start_services"
)
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
nightly
@
pytest
.
mark
.
model
(
TEST_MODEL
)
def
test_tool_calling_second_round
(
request
,
runtime_services
,
predownload_models
)
->
None
:
...
...
@@ -388,11 +383,7 @@ def test_tool_calling_second_round(
@
pytest
.
mark
.
usefixtures
(
"start_services"
)
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
nightly
@
pytest
.
mark
.
model
(
TEST_MODEL
)
def
test_reasoning
(
request
,
runtime_services
,
predownload_models
)
->
None
:
"""Test reasoning functionality with a mathematical problem."""
...
...
tests/router/test_router_e2e_with_mockers.py
View file @
7e499b5c
...
...
@@ -22,17 +22,17 @@ from tests.router.common import ( # utilities
from
tests.utils.constants
import
ROUTER_MODEL_NAME
from
tests.utils.managed_process
import
ManagedProcess
logger
=
logging
.
getLogger
(
__name__
)
MODEL_NAME
=
ROUTER_MODEL_NAME
pytestmark
=
[
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
integration
,
pytest
.
mark
.
parallel
,
pytest
.
mark
.
model
(
MODEL_NAME
),
]
logger
=
logging
.
getLogger
(
__name__
)
MODEL_NAME
=
ROUTER_MODEL_NAME
NUM_MOCKERS
=
2
SPEEDUP_RATIO
=
10.0
BASE_PORT
=
9100
# Base port for all tests (high port to avoid conflicts)
...
...
@@ -287,11 +287,6 @@ class DisaggMockerProcess:
self
.
_process
.
__exit__
(
exc_type
,
exc_val
,
exc_tb
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
integration
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
model
(
MODEL_NAME
)
def
test_mocker_kv_router
(
request
,
runtime_services_session
,
predownload_tokenizers
):
"""
Test KV router with multiple mocker engine instances.
...
...
@@ -331,11 +326,6 @@ def test_mocker_kv_router(request, runtime_services_session, predownload_tokeniz
mockers
.
__exit__
(
None
,
None
,
None
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
integration
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
model
(
MODEL_NAME
)
@
pytest
.
mark
.
parametrize
(
"store_backend"
,
[
"etcd"
,
"file"
])
def
test_mocker_two_kv_router
(
request
,
...
...
@@ -391,11 +381,6 @@ def test_mocker_two_kv_router(
mockers
.
__exit__
(
None
,
None
,
None
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
integration
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
model
(
MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Flaky, temporarily disabled"
)
def
test_mocker_kv_router_overload_503
(
request
,
runtime_services_session
,
predownload_tokenizers
...
...
@@ -434,11 +419,6 @@ def test_mocker_kv_router_overload_503(
mockers
.
__exit__
(
None
,
None
,
None
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
integration
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
model
(
MODEL_NAME
)
def
test_kv_push_router_bindings
(
request
,
runtime_services_session
,
predownload_tokenizers
):
...
...
@@ -475,11 +455,6 @@ def test_kv_push_router_bindings(
mockers
.
__exit__
(
None
,
None
,
None
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
integration
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
model
(
MODEL_NAME
)
@
pytest
.
mark
.
parametrize
(
"store_backend"
,
[
"etcd"
,
"file"
])
def
test_indexers_sync
(
request
,
...
...
@@ -529,11 +504,6 @@ def test_indexers_sync(
mockers
.
__exit__
(
None
,
None
,
None
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
integration
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
model
(
MODEL_NAME
)
def
test_query_instance_id_returns_worker_and_tokens
(
request
,
runtime_services_session
,
predownload_tokenizers
):
...
...
@@ -568,11 +538,6 @@ def test_query_instance_id_returns_worker_and_tokens(
mockers
.
__exit__
(
None
,
None
,
None
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
integration
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
model
(
MODEL_NAME
)
def
test_router_decisions
(
request
,
runtime_services_session
,
predownload_tokenizers
):
"""Validate KV cache prefix reuse and dp_rank routing by sending progressive requests with overlapping prefixes."""
...
...
@@ -612,9 +577,6 @@ def test_router_decisions(request, runtime_services_session, predownload_tokeniz
mockers
.
__exit__
(
None
,
None
,
None
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
model
(
MODEL_NAME
)
def
test_router_disagg_decisions
(
request
,
runtime_services_session
,
predownload_tokenizers
):
...
...
@@ -680,11 +642,6 @@ def test_router_disagg_decisions(
prefill_workers
.
__exit__
(
None
,
None
,
None
)
@
pytest
.
mark
.
pre_merge
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
integration
@
pytest
.
mark
.
parallel
@
pytest
.
mark
.
model
(
MODEL_NAME
)
def
test_busy_threshold_endpoint
(
request
,
runtime_services_session
,
predownload_tokenizers
):
...
...
tests/router/test_router_e2e_with_vllm.py
View file @
7e499b5c
...
...
@@ -18,6 +18,13 @@ from tests.utils.managed_process import ManagedProcess
logger
=
logging
.
getLogger
(
__name__
)
MODEL_NAME
=
"TinyLlama/TinyLlama-1.1B-Chat-v1.0"
pytestmark
=
[
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
e2e
,
pytest
.
mark
.
vllm
,
pytest
.
mark
.
model
(
MODEL_NAME
),
]
SPEEDUP_RATIO
=
10.0
PORTS
=
[
8011
,
...
...
@@ -269,11 +276,8 @@ class VLLMProcess:
time
.
sleep
(
2
)
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
skip
(
reason
=
"All vLLM tests disabled for now"
)
@
pytest
.
mark
.
model
(
MODEL_NAME
)
def
test_vllm_kv_router_basic
(
request
,
runtime_services
,
predownload_tokenizers
):
"""
Quick e2e sanity test for KV router with vLLM engine instances.
...
...
@@ -319,11 +323,8 @@ def test_vllm_kv_router_basic(request, runtime_services, predownload_tokenizers)
vllm_workers
.
__exit__
(
None
,
None
,
None
)
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
skip
(
reason
=
"All vLLM tests disabled for now"
)
@
pytest
.
mark
.
model
(
MODEL_NAME
)
def
test_router_decisions_vllm_multiple_workers
(
request
,
runtime_services
,
predownload_tokenizers
):
...
...
@@ -371,11 +372,8 @@ def test_router_decisions_vllm_multiple_workers(
vllm_workers
.
__exit__
(
None
,
None
,
None
)
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
gpu_2
@
pytest
.
mark
.
skip
(
reason
=
"All vLLM tests disabled for now"
)
@
pytest
.
mark
.
model
(
MODEL_NAME
)
def
test_router_decisions_vllm_dp
(
request
,
runtime_services
,
predownload_tokenizers
):
"""Validate KV cache prefix reuse with vLLM by sending progressive requests with overlapping prefixes.
Same flow as test_router_decisions_vllm_multiple_workers; force first request to (worker_id, dp_rank=1).
...
...
tests/serve/test_sglang.py
View file @
7e499b5c
...
...
@@ -44,7 +44,7 @@ sglang_configs = {
name
=
"aggregated"
,
directory
=
sglang_dir
,
script_name
=
"agg.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
],
model
=
"Qwen/Qwen3-0.6B"
,
env
=
{},
models_port
=
8000
,
...
...
@@ -73,7 +73,11 @@ sglang_configs = {
name
=
"disaggregated_same_gpu"
,
directory
=
sglang_dir
,
script_name
=
"disagg_same_gpu.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
skip
(
reason
=
"unstable"
)],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
skip
(
reason
=
"unstable"
),
],
model
=
"Qwen/Qwen3-0.6B"
,
env
=
{},
models_port
=
8000
,
...
...
@@ -116,7 +120,7 @@ sglang_configs = {
name
=
"template_verification"
,
directory
=
SERVE_TEST_DIR
,
# special directory for test-specific scripts
script_name
=
"template_verifier.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
nightly
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
nightly
],
model
=
"Qwen/Qwen3-0.6B"
,
env
=
{},
models_port
=
8000
,
...
...
@@ -159,7 +163,7 @@ sglang_configs = {
name
=
"embedding_agg"
,
directory
=
sglang_dir
,
script_name
=
"agg_embed.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
nightly
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
nightly
],
model
=
"Qwen/Qwen3-Embedding-4B"
,
delayed_start
=
0
,
timeout
=
180
,
...
...
tests/serve/test_trtllm.py
View file @
7e499b5c
...
...
@@ -40,7 +40,7 @@ trtllm_configs = {
name
=
"aggregated"
,
directory
=
trtllm_dir
,
script_name
=
"agg_metrics.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
trtllm
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
trtllm
],
model
=
"Qwen/Qwen3-0.6B"
,
models_port
=
8000
,
request_payloads
=
[
...
...
@@ -65,7 +65,7 @@ trtllm_configs = {
name
=
"disaggregated_same_gpu"
,
directory
=
trtllm_dir
,
script_name
=
"disagg_same_gpu.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
trtllm
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
trtllm
],
model
=
"Qwen/Qwen3-0.6B"
,
models_port
=
8000
,
request_payloads
=
[
...
...
@@ -79,7 +79,7 @@ trtllm_configs = {
name
=
"aggregated_router"
,
directory
=
trtllm_dir
,
script_name
=
"agg_router.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
trtllm
,
pytest
.
mark
.
post_merge
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
trtllm
],
model
=
"Qwen/Qwen3-0.6B"
,
models_port
=
8000
,
request_payloads
=
[
...
...
tests/serve/test_vllm.py
View file @
7e499b5c
...
...
@@ -43,7 +43,7 @@ vllm_configs = {
name
=
"aggregated"
,
directory
=
vllm_dir
,
script_name
=
"agg.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
],
model
=
"Qwen/Qwen3-0.6B"
,
request_payloads
=
[
chat_payload_default
(),
...
...
@@ -55,7 +55,7 @@ vllm_configs = {
name
=
"aggregated_lmcache"
,
directory
=
vllm_dir
,
script_name
=
"agg_lmcache.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
],
model
=
"Qwen/Qwen3-0.6B"
,
request_payloads
=
[
chat_payload_default
(),
...
...
@@ -68,7 +68,7 @@ vllm_configs = {
name
=
"agg-request-plane-tcp"
,
directory
=
vllm_dir
,
script_name
=
"agg_request_planes.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
],
model
=
"Qwen/Qwen3-0.6B"
,
script_args
=
[
"--tcp"
],
request_payloads
=
[
...
...
@@ -80,7 +80,7 @@ vllm_configs = {
name
=
"agg-request-plane-http"
,
directory
=
vllm_dir
,
script_name
=
"agg_request_planes.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
],
model
=
"Qwen/Qwen3-0.6B"
,
script_args
=
[
"--http"
],
request_payloads
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment