Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
4bd6299b
Unverified
Commit
4bd6299b
authored
Mar 11, 2026
by
Graham King
Committed by
GitHub
Mar 11, 2026
Browse files
test: pytests able to run locally now (#7219)
Signed-off-by:
Graham King
<
grahamk@nvidia.com
>
parent
5d5fd243
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
70 additions
and
57 deletions
+70
-57
container/deps/requirements.test.txt
container/deps/requirements.test.txt
+1
-0
pyproject.toml
pyproject.toml
+6
-0
tests/basic/test_wheel_contents.py
tests/basic/test_wheel_contents.py
+1
-0
tests/conftest.py
tests/conftest.py
+31
-0
tests/dependencies/test_kvbm_imports.py
tests/dependencies/test_kvbm_imports.py
+6
-6
tests/dependencies/test_vllm_imports.py
tests/dependencies/test_vllm_imports.py
+0
-34
tests/fault_tolerance/deploy/scenarios.py
tests/fault_tolerance/deploy/scenarios.py
+2
-2
tests/fault_tolerance/deploy/test_deployment.py
tests/fault_tolerance/deploy/test_deployment.py
+13
-7
tests/frontend/grpc/echo_tensor_worker.py
tests/frontend/grpc/echo_tensor_worker.py
+2
-1
tests/frontend/grpc/triton_echo_client.py
tests/frontend/grpc/triton_echo_client.py
+1
-1
tests/serve/test_vllm.py
tests/serve/test_vllm.py
+6
-5
tests/utils/payload_builder.py
tests/utils/payload_builder.py
+1
-1
No files found.
container/deps/requirements.test.txt
View file @
4bd6299b
...
...
@@ -38,4 +38,5 @@ tabulate==0.9.0
types-aiofiles>=24.1.0
types-PyYAML==6.0.12.20250915
types-requests==2.32.4.20250913
types-tabulate>=0.9.0
websocket-client==1.9.0
pyproject.toml
View file @
4bd6299b
...
...
@@ -318,6 +318,12 @@ module = ["vllm.*"]
follow_imports
=
"skip"
ignore_missing_imports
=
true
[[tool.mypy.overrides]]
# WAR mypy 1.18.x crash with numpy 1.26.x stubs:
# "Should never get here in normal mode, got TypeAlias:numpy.float64 instead of TypeInfo"
module
=
[
"numpy"
,
"numpy.*"
]
follow_imports
=
"skip"
[tool.sphinx]
# extra-content-head
...
...
tests/basic/test_wheel_contents.py
View file @
4bd6299b
...
...
@@ -26,6 +26,7 @@ def test_no_bundled_shared_libraries():
except
PackageNotFoundError
:
pytest
.
fail
(
"ai-dynamo-runtime is not installed"
)
assert
installed_files
is
not
None
,
"ai-dynamo-runtime has no recorded files"
bundled_libs
=
[
str
(
f
)
for
f
in
installed_files
if
".libs/"
in
str
(
f
)
and
".so"
in
str
(
f
)
]
...
...
tests/conftest.py
View file @
4bd6299b
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import
importlib.util
import
logging
import
os
import
shutil
...
...
@@ -50,6 +51,7 @@ def pytest_configure(config):
"vllm: marks tests as requiring vllm"
,
"trtllm: marks tests as requiring trtllm"
,
"sglang: marks tests as requiring sglang"
,
"lmcache: mark tests as requiring lmcache"
,
"multimodal: marks tests as multimodal (image/video) tests"
,
"slow: marks tests as known to be slow"
,
"h100: marks tests to run on H100"
,
...
...
@@ -282,11 +284,40 @@ def logger(request):
logger
.
removeHandler
(
handler
)
def
_item_has_marker
(
item
,
marker_name
):
"""Check if a test item has a marker, including module-level pytestmark."""
if
item
.
get_closest_marker
(
marker_name
):
return
True
module
=
getattr
(
item
,
"module"
,
None
)
if
module
is
not
None
:
marks
=
getattr
(
module
,
"pytestmark"
,
[])
if
not
isinstance
(
marks
,
list
):
marks
=
[
marks
]
if
any
(
getattr
(
m
,
"name"
,
""
)
==
marker_name
for
m
in
marks
):
return
True
return
False
@
pytest
.
hookimpl
(
trylast
=
True
)
def
pytest_collection_modifyitems
(
config
,
items
):
"""
This function is called to modify the list of tests to run.
"""
# Auto-skip tests marked with a framework marker when the framework is not installed
framework_markers
=
{
"trtllm"
:
"tensorrt_llm"
,
"vllm"
:
"vllm"
,
"sglang"
:
"sglang"
,
"kvbm"
:
"kvbm"
,
"lmcache"
:
"lmcache"
,
}
for
marker_name
,
module_name
in
framework_markers
.
items
():
if
importlib
.
util
.
find_spec
(
module_name
)
is
None
:
skip
=
pytest
.
mark
.
skip
(
reason
=
f
"
{
module_name
}
is not installed"
)
for
item
in
items
:
if
_item_has_marker
(
item
,
marker_name
):
item
.
add_marker
(
skip
)
# Collect models via explicit pytest mark from final filtered items only
models_to_download
=
set
()
for
item
in
items
:
...
...
tests/dependencies/test_kvbm_imports.py
View file @
4bd6299b
...
...
@@ -54,7 +54,7 @@ def _check_kvbm_imports():
# Base tests (no framework markers) - run in main job with --framework none --enable-kvbm
@
pytest
.
mark
.
p
re
_merge
@
pytest
.
mark
.
p
ost
_merge
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
unit
def
test_kvbm_wheel_exists
():
...
...
@@ -62,7 +62,7 @@ def test_kvbm_wheel_exists():
_check_kvbm_wheel_exists
()
@
pytest
.
mark
.
p
re
_merge
@
pytest
.
mark
.
p
ost
_merge
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
unit
def
test_kvbm_imports
():
...
...
@@ -71,7 +71,7 @@ def test_kvbm_imports():
# vLLM-specific tests - run in vLLM job (vLLM auto-enables KVBM)
@
pytest
.
mark
.
p
re
_merge
@
pytest
.
mark
.
p
ost
_merge
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
unit
@
pytest
.
mark
.
gpu_0
...
...
@@ -80,7 +80,7 @@ def test_kvbm_wheel_exists_vllm():
_check_kvbm_wheel_exists
()
@
pytest
.
mark
.
p
re
_merge
@
pytest
.
mark
.
p
ost
_merge
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
unit
@
pytest
.
mark
.
gpu_0
...
...
@@ -90,7 +90,7 @@ def test_kvbm_imports_vllm():
# TRT-LLM-specific tests - run in TRT-LLM job (TRT-LLM auto-enables KVBM)
@
pytest
.
mark
.
p
re
_merge
@
pytest
.
mark
.
p
ost
_merge
@
pytest
.
mark
.
trtllm
@
pytest
.
mark
.
unit
@
pytest
.
mark
.
gpu_0
...
...
@@ -99,7 +99,7 @@ def test_kvbm_wheel_exists_trtllm():
_check_kvbm_wheel_exists
()
@
pytest
.
mark
.
p
re
_merge
@
pytest
.
mark
.
p
ost
_merge
@
pytest
.
mark
.
trtllm
@
pytest
.
mark
.
unit
@
pytest
.
mark
.
gpu_0
...
...
tests/dependencies/test_vllm_imports.py
deleted
100644 → 0
View file @
5d5fd243
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Unit tests to sanity check that required dependencies can be imported."""
import
pytest
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
unit
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
pre_merge
def
test_import_deep_ep
():
"""Test that deep_ep module can be imported."""
try
:
import
deep_ep
assert
deep_ep
is
not
None
except
ImportError
as
e
:
pytest
.
fail
(
f
"Failed to import deep_ep:
{
e
}
"
)
@
pytest
.
mark
.
vllm
@
pytest
.
mark
.
unit
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
pre_merge
def
test_import_pplx_kernels
():
"""Test that pplx_kernels module can be imported."""
try
:
import
pplx_kernels
assert
pplx_kernels
is
not
None
except
ImportError
as
e
:
pytest
.
fail
(
f
"Failed to import pplx_kernels:
{
e
}
"
)
tests/fault_tolerance/deploy/scenarios.py
View file @
4bd6299b
...
...
@@ -20,7 +20,7 @@ import time
from
abc
import
ABC
,
abstractmethod
from
dataclasses
import
dataclass
,
field
from
enum
import
Enum
,
auto
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Optional
,
Pattern
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
,
Pattern
from
typing_extensions
import
Required
,
TypedDict
...
...
@@ -568,7 +568,7 @@ class TerminateProcessFailure(Failure):
f
"Checking Frontend service health (after
{
service_name
}
pod restart)..."
)
pod_ports
=
{}
# Temporary dict for port forward tracking
pod_ports
:
dict
[
str
,
Any
]
=
{}
# Temporary dict for port forward tracking
try
:
logger
.
info
(
"Getting frontend pod and setting up port forward..."
)
frontend_pod_name
,
local_port
,
frontend_pod
=
get_frontend_port
(
...
...
tests/fault_tolerance/deploy/test_deployment.py
View file @
4bd6299b
...
...
@@ -9,7 +9,7 @@ import re
import
signal
from
contextlib
import
contextmanager
from
multiprocessing.context
import
SpawnProcess
from
typing
import
Any
from
typing
import
Any
,
Optional
import
pytest
...
...
@@ -31,8 +31,8 @@ from tests.utils.test_output import resolve_test_output_path
def
get_model_from_deployment
(
deployment_spec
:
DeploymentSpec
,
scenario
:
Scenario
=
None
,
service_name
:
str
=
None
,
scenario
:
Optional
[
Scenario
]
=
None
,
service_name
:
Optional
[
str
]
=
None
,
)
->
str
:
"""Get model name from deployment spec.
...
...
@@ -60,19 +60,22 @@ def get_model_from_deployment(
# Get model from backend-specific worker (if scenario provided)
if
scenario
:
try
:
model
:
Optional
[
str
]
=
None
if
scenario
.
backend
==
"vllm"
:
return
deployment_spec
[
"VllmDecodeWorker"
].
model
model
=
deployment_spec
[
"VllmDecodeWorker"
].
model
elif
scenario
.
backend
==
"sglang"
:
return
deployment_spec
[
"decode"
].
model
model
=
deployment_spec
[
"decode"
].
model
elif
scenario
.
backend
==
"trtllm"
:
# Determine deployment type from scenario deployment name
if
(
"agg"
in
deployment_spec
.
name
and
"disagg"
not
in
deployment_spec
.
name
):
return
deployment_spec
[
"TRTLLMWorker"
].
model
model
=
deployment_spec
[
"TRTLLMWorker"
].
model
else
:
return
deployment_spec
[
"TRTLLMDecodeWorker"
].
model
model
=
deployment_spec
[
"TRTLLMDecodeWorker"
].
model
if
model
:
return
model
except
(
KeyError
,
AttributeError
)
as
e
:
logging
.
warning
(
f
"Could not get model from backend-specific worker "
...
...
@@ -290,6 +293,8 @@ async def _inject_failures(
return
affected_pods
# TODO: These globals might not work in parallel testing. FIXME
global_result_list
=
[]
# Global storage for test results (used by validation fixture)
test_results_cache
=
{}
...
...
@@ -489,6 +494,7 @@ def results_summary():
@
pytest
.
mark
.
post_merge
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
gpu_0
@
pytest
.
mark
.
filterwarnings
(
"ignore::DeprecationWarning"
)
async
def
test_fault_scenario
(
scenario
:
Scenario
,
# noqa: F811
...
...
tests/frontend/grpc/echo_tensor_worker.py
View file @
4bd6299b
...
...
@@ -46,6 +46,7 @@ async def echo_tensor_worker(runtime: DistributedRuntime):
# Internally the bytes string will be converted to List of int
retrieved_model_config
=
runtime_config
.
get_tensor_model_config
()
assert
retrieved_model_config
is
not
None
retrieved_model_config
[
"triton_model_config"
]
=
bytes
(
retrieved_model_config
[
"triton_model_config"
]
)
...
...
@@ -63,7 +64,7 @@ async def echo_tensor_worker(runtime: DistributedRuntime):
await
endpoint
.
serve_endpoint
(
generate
)
async
def
generate
(
request
,
context
):
async
def
generate
(
request
):
"""Echo tensors and parameters back to the client."""
# [NOTE] gluo: currently there is no frontend side
# validation between model config and actual request,
...
...
tests/frontend/grpc/triton_echo_client.py
View file @
4bd6299b
...
...
@@ -86,7 +86,7 @@ class TritonEchoClient:
class
UserData
:
def
__init__
(
self
):
self
.
_completed_requests
=
queue
.
Queue
()
self
.
_completed_requests
:
queue
.
Queue
=
queue
.
Queue
()
# Define the callback function. Note the last two parameters should be
# result and error. InferenceServerClient would povide the results of an
...
...
tests/serve/test_vllm.py
View file @
4bd6299b
...
...
@@ -114,13 +114,13 @@ vllm_configs = {
directory
=
vllm_dir
,
script_name
=
"agg_lmcache.sh"
,
marks
=
[
pytest
.
mark
.
lmcache
,
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
timeout
(
360
),
# 3x estimated time (70s) + download time (150s)
pytest
.
mark
.
skipif
(
_is_cuda13
(),
reason
=
"lmcache does not support CUDA 13 as of v0.3.11"
,
strict
=
False
,
),
],
model
=
"Qwen/Qwen3-0.6B"
,
...
...
@@ -136,13 +136,13 @@ vllm_configs = {
directory
=
vllm_dir
,
script_name
=
"agg_lmcache_multiproc.sh"
,
marks
=
[
pytest
.
mark
.
lmcache
,
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
timeout
(
360
),
# 3x estimated time (70s) + download time (150s)
pytest
.
mark
.
skipif
(
_is_cuda13
(),
reason
=
"lmcache does not support CUDA 13 as of v0.3.11"
,
strict
=
False
,
),
],
model
=
"Qwen/Qwen3-0.6B"
,
...
...
@@ -317,7 +317,8 @@ vllm_configs = {
name
=
"multimodal_agg_frontend_decoding"
,
directory
=
vllm_dir
,
script_name
=
"agg_multimodal.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
pre_merge
],
# post_merge because needs real NIXL not stub
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
post_merge
],
model
=
"Qwen/Qwen2-VL-2B-Instruct"
,
# Pass --frontend-decoding to enable Rust frontend image decoding + NIXL RDMA transfer
script_args
=
[
...
...
@@ -351,7 +352,7 @@ vllm_configs = {
script_name
=
"disagg_multimodal_epd.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
p
re
_merge
,
pytest
.
mark
.
p
ost
_merge
,
pytest
.
mark
.
skip
(
reason
=
"DYN-2265"
),
],
model
=
"Qwen/Qwen3-VL-2B-Instruct"
,
...
...
@@ -388,7 +389,7 @@ vllm_configs = {
name
=
"multimodal_agg_qwen"
,
directory
=
vllm_dir
,
script_name
=
"agg_multimodal.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
p
re
_merge
],
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
p
ost
_merge
],
model
=
"Qwen/Qwen2.5-VL-7B-Instruct"
,
script_args
=
[
"--model"
,
"Qwen/Qwen2.5-VL-7B-Instruct"
],
delayed_start
=
0
,
...
...
tests/utils/payload_builder.py
View file @
4bd6299b
...
...
@@ -198,7 +198,7 @@ def metric_payload_default(
Returns:
Backend-specific MetricsPayload subclass based on backend parameter
"""
common_args
=
{
common_args
:
dict
[
str
,
Any
]
=
{
"body"
:
{},
"repeat_count"
:
repeat_count
,
"expected_log"
:
expected_log
or
[],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment