"lib/vscode:/vscode.git/clone" did not exist on "b823575e606f31b98b0f51fcb33e757cd60947c4"
Unverified Commit 4bd6299b authored by Graham King's avatar Graham King Committed by GitHub
Browse files

test: pytests able to run locally now (#7219)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent 5d5fd243
...@@ -38,4 +38,5 @@ tabulate==0.9.0 ...@@ -38,4 +38,5 @@ tabulate==0.9.0
types-aiofiles>=24.1.0 types-aiofiles>=24.1.0
types-PyYAML==6.0.12.20250915 types-PyYAML==6.0.12.20250915
types-requests==2.32.4.20250913 types-requests==2.32.4.20250913
types-tabulate>=0.9.0
websocket-client==1.9.0 websocket-client==1.9.0
...@@ -318,6 +318,12 @@ module = ["vllm.*"] ...@@ -318,6 +318,12 @@ module = ["vllm.*"]
follow_imports = "skip" follow_imports = "skip"
ignore_missing_imports = true ignore_missing_imports = true
[[tool.mypy.overrides]]
# WAR mypy 1.18.x crash with numpy 1.26.x stubs:
# "Should never get here in normal mode, got TypeAlias:numpy.float64 instead of TypeInfo"
module = ["numpy", "numpy.*"]
follow_imports = "skip"
[tool.sphinx] [tool.sphinx]
# extra-content-head # extra-content-head
......
...@@ -26,6 +26,7 @@ def test_no_bundled_shared_libraries(): ...@@ -26,6 +26,7 @@ def test_no_bundled_shared_libraries():
except PackageNotFoundError: except PackageNotFoundError:
pytest.fail("ai-dynamo-runtime is not installed") pytest.fail("ai-dynamo-runtime is not installed")
assert installed_files is not None, "ai-dynamo-runtime has no recorded files"
bundled_libs = [ bundled_libs = [
str(f) for f in installed_files if ".libs/" in str(f) and ".so" in str(f) str(f) for f in installed_files if ".libs/" in str(f) and ".so" in str(f)
] ]
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import importlib.util
import logging import logging
import os import os
import shutil import shutil
...@@ -50,6 +51,7 @@ def pytest_configure(config): ...@@ -50,6 +51,7 @@ def pytest_configure(config):
"vllm: marks tests as requiring vllm", "vllm: marks tests as requiring vllm",
"trtllm: marks tests as requiring trtllm", "trtllm: marks tests as requiring trtllm",
"sglang: marks tests as requiring sglang", "sglang: marks tests as requiring sglang",
"lmcache: mark tests as requiring lmcache",
"multimodal: marks tests as multimodal (image/video) tests", "multimodal: marks tests as multimodal (image/video) tests",
"slow: marks tests as known to be slow", "slow: marks tests as known to be slow",
"h100: marks tests to run on H100", "h100: marks tests to run on H100",
...@@ -282,11 +284,40 @@ def logger(request): ...@@ -282,11 +284,40 @@ def logger(request):
logger.removeHandler(handler) logger.removeHandler(handler)
def _item_has_marker(item, marker_name):
"""Check if a test item has a marker, including module-level pytestmark."""
if item.get_closest_marker(marker_name):
return True
module = getattr(item, "module", None)
if module is not None:
marks = getattr(module, "pytestmark", [])
if not isinstance(marks, list):
marks = [marks]
if any(getattr(m, "name", "") == marker_name for m in marks):
return True
return False
@pytest.hookimpl(trylast=True) @pytest.hookimpl(trylast=True)
def pytest_collection_modifyitems(config, items): def pytest_collection_modifyitems(config, items):
""" """
This function is called to modify the list of tests to run. This function is called to modify the list of tests to run.
""" """
# Auto-skip tests marked with a framework marker when the framework is not installed
framework_markers = {
"trtllm": "tensorrt_llm",
"vllm": "vllm",
"sglang": "sglang",
"kvbm": "kvbm",
"lmcache": "lmcache",
}
for marker_name, module_name in framework_markers.items():
if importlib.util.find_spec(module_name) is None:
skip = pytest.mark.skip(reason=f"{module_name} is not installed")
for item in items:
if _item_has_marker(item, marker_name):
item.add_marker(skip)
# Collect models via explicit pytest mark from final filtered items only # Collect models via explicit pytest mark from final filtered items only
models_to_download = set() models_to_download = set()
for item in items: for item in items:
......
...@@ -54,7 +54,7 @@ def _check_kvbm_imports(): ...@@ -54,7 +54,7 @@ def _check_kvbm_imports():
# Base tests (no framework markers) - run in main job with --framework none --enable-kvbm # Base tests (no framework markers) - run in main job with --framework none --enable-kvbm
@pytest.mark.pre_merge @pytest.mark.post_merge
@pytest.mark.gpu_0 @pytest.mark.gpu_0
@pytest.mark.unit @pytest.mark.unit
def test_kvbm_wheel_exists(): def test_kvbm_wheel_exists():
...@@ -62,7 +62,7 @@ def test_kvbm_wheel_exists(): ...@@ -62,7 +62,7 @@ def test_kvbm_wheel_exists():
_check_kvbm_wheel_exists() _check_kvbm_wheel_exists()
@pytest.mark.pre_merge @pytest.mark.post_merge
@pytest.mark.gpu_0 @pytest.mark.gpu_0
@pytest.mark.unit @pytest.mark.unit
def test_kvbm_imports(): def test_kvbm_imports():
...@@ -71,7 +71,7 @@ def test_kvbm_imports(): ...@@ -71,7 +71,7 @@ def test_kvbm_imports():
# vLLM-specific tests - run in vLLM job (vLLM auto-enables KVBM) # vLLM-specific tests - run in vLLM job (vLLM auto-enables KVBM)
@pytest.mark.pre_merge @pytest.mark.post_merge
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.unit @pytest.mark.unit
@pytest.mark.gpu_0 @pytest.mark.gpu_0
...@@ -80,7 +80,7 @@ def test_kvbm_wheel_exists_vllm(): ...@@ -80,7 +80,7 @@ def test_kvbm_wheel_exists_vllm():
_check_kvbm_wheel_exists() _check_kvbm_wheel_exists()
@pytest.mark.pre_merge @pytest.mark.post_merge
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.unit @pytest.mark.unit
@pytest.mark.gpu_0 @pytest.mark.gpu_0
...@@ -90,7 +90,7 @@ def test_kvbm_imports_vllm(): ...@@ -90,7 +90,7 @@ def test_kvbm_imports_vllm():
# TRT-LLM-specific tests - run in TRT-LLM job (TRT-LLM auto-enables KVBM) # TRT-LLM-specific tests - run in TRT-LLM job (TRT-LLM auto-enables KVBM)
@pytest.mark.pre_merge @pytest.mark.post_merge
@pytest.mark.trtllm @pytest.mark.trtllm
@pytest.mark.unit @pytest.mark.unit
@pytest.mark.gpu_0 @pytest.mark.gpu_0
...@@ -99,7 +99,7 @@ def test_kvbm_wheel_exists_trtllm(): ...@@ -99,7 +99,7 @@ def test_kvbm_wheel_exists_trtllm():
_check_kvbm_wheel_exists() _check_kvbm_wheel_exists()
@pytest.mark.pre_merge @pytest.mark.post_merge
@pytest.mark.trtllm @pytest.mark.trtllm
@pytest.mark.unit @pytest.mark.unit
@pytest.mark.gpu_0 @pytest.mark.gpu_0
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Unit tests to sanity check that required dependencies can be imported."""
import pytest
@pytest.mark.vllm
@pytest.mark.unit
@pytest.mark.gpu_1
@pytest.mark.pre_merge
def test_import_deep_ep():
"""Test that deep_ep module can be imported."""
try:
import deep_ep
assert deep_ep is not None
except ImportError as e:
pytest.fail(f"Failed to import deep_ep: {e}")
@pytest.mark.vllm
@pytest.mark.unit
@pytest.mark.gpu_1
@pytest.mark.pre_merge
def test_import_pplx_kernels():
"""Test that pplx_kernels module can be imported."""
try:
import pplx_kernels
assert pplx_kernels is not None
except ImportError as e:
pytest.fail(f"Failed to import pplx_kernels: {e}")
...@@ -20,7 +20,7 @@ import time ...@@ -20,7 +20,7 @@ import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass, field from dataclasses import dataclass, field
from enum import Enum, auto from enum import Enum, auto
from typing import TYPE_CHECKING, Dict, List, Optional, Pattern from typing import TYPE_CHECKING, Any, Dict, List, Optional, Pattern
from typing_extensions import Required, TypedDict from typing_extensions import Required, TypedDict
...@@ -568,7 +568,7 @@ class TerminateProcessFailure(Failure): ...@@ -568,7 +568,7 @@ class TerminateProcessFailure(Failure):
f"Checking Frontend service health (after {service_name} pod restart)..." f"Checking Frontend service health (after {service_name} pod restart)..."
) )
pod_ports = {} # Temporary dict for port forward tracking pod_ports: dict[str, Any] = {} # Temporary dict for port forward tracking
try: try:
logger.info("Getting frontend pod and setting up port forward...") logger.info("Getting frontend pod and setting up port forward...")
frontend_pod_name, local_port, frontend_pod = get_frontend_port( frontend_pod_name, local_port, frontend_pod = get_frontend_port(
......
...@@ -9,7 +9,7 @@ import re ...@@ -9,7 +9,7 @@ import re
import signal import signal
from contextlib import contextmanager from contextlib import contextmanager
from multiprocessing.context import SpawnProcess from multiprocessing.context import SpawnProcess
from typing import Any from typing import Any, Optional
import pytest import pytest
...@@ -31,8 +31,8 @@ from tests.utils.test_output import resolve_test_output_path ...@@ -31,8 +31,8 @@ from tests.utils.test_output import resolve_test_output_path
def get_model_from_deployment( def get_model_from_deployment(
deployment_spec: DeploymentSpec, deployment_spec: DeploymentSpec,
scenario: Scenario = None, scenario: Optional[Scenario] = None,
service_name: str = None, service_name: Optional[str] = None,
) -> str: ) -> str:
"""Get model name from deployment spec. """Get model name from deployment spec.
...@@ -60,19 +60,22 @@ def get_model_from_deployment( ...@@ -60,19 +60,22 @@ def get_model_from_deployment(
# Get model from backend-specific worker (if scenario provided) # Get model from backend-specific worker (if scenario provided)
if scenario: if scenario:
try: try:
model: Optional[str] = None
if scenario.backend == "vllm": if scenario.backend == "vllm":
return deployment_spec["VllmDecodeWorker"].model model = deployment_spec["VllmDecodeWorker"].model
elif scenario.backend == "sglang": elif scenario.backend == "sglang":
return deployment_spec["decode"].model model = deployment_spec["decode"].model
elif scenario.backend == "trtllm": elif scenario.backend == "trtllm":
# Determine deployment type from scenario deployment name # Determine deployment type from scenario deployment name
if ( if (
"agg" in deployment_spec.name "agg" in deployment_spec.name
and "disagg" not in deployment_spec.name and "disagg" not in deployment_spec.name
): ):
return deployment_spec["TRTLLMWorker"].model model = deployment_spec["TRTLLMWorker"].model
else: else:
return deployment_spec["TRTLLMDecodeWorker"].model model = deployment_spec["TRTLLMDecodeWorker"].model
if model:
return model
except (KeyError, AttributeError) as e: except (KeyError, AttributeError) as e:
logging.warning( logging.warning(
f"Could not get model from backend-specific worker " f"Could not get model from backend-specific worker "
...@@ -290,6 +293,8 @@ async def _inject_failures( ...@@ -290,6 +293,8 @@ async def _inject_failures(
return affected_pods return affected_pods
# TODO: These globals might not work in parallel testing. FIXME
global_result_list = [] global_result_list = []
# Global storage for test results (used by validation fixture) # Global storage for test results (used by validation fixture)
test_results_cache = {} test_results_cache = {}
...@@ -489,6 +494,7 @@ def results_summary(): ...@@ -489,6 +494,7 @@ def results_summary():
@pytest.mark.post_merge @pytest.mark.post_merge
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.gpu_0
@pytest.mark.filterwarnings("ignore::DeprecationWarning") @pytest.mark.filterwarnings("ignore::DeprecationWarning")
async def test_fault_scenario( async def test_fault_scenario(
scenario: Scenario, # noqa: F811 scenario: Scenario, # noqa: F811
......
...@@ -46,6 +46,7 @@ async def echo_tensor_worker(runtime: DistributedRuntime): ...@@ -46,6 +46,7 @@ async def echo_tensor_worker(runtime: DistributedRuntime):
# Internally the bytes string will be converted to List of int # Internally the bytes string will be converted to List of int
retrieved_model_config = runtime_config.get_tensor_model_config() retrieved_model_config = runtime_config.get_tensor_model_config()
assert retrieved_model_config is not None
retrieved_model_config["triton_model_config"] = bytes( retrieved_model_config["triton_model_config"] = bytes(
retrieved_model_config["triton_model_config"] retrieved_model_config["triton_model_config"]
) )
...@@ -63,7 +64,7 @@ async def echo_tensor_worker(runtime: DistributedRuntime): ...@@ -63,7 +64,7 @@ async def echo_tensor_worker(runtime: DistributedRuntime):
await endpoint.serve_endpoint(generate) await endpoint.serve_endpoint(generate)
async def generate(request, context): async def generate(request):
"""Echo tensors and parameters back to the client.""" """Echo tensors and parameters back to the client."""
# [NOTE] gluo: currently there is no frontend side # [NOTE] gluo: currently there is no frontend side
# validation between model config and actual request, # validation between model config and actual request,
......
...@@ -86,7 +86,7 @@ class TritonEchoClient: ...@@ -86,7 +86,7 @@ class TritonEchoClient:
class UserData: class UserData:
def __init__(self): def __init__(self):
self._completed_requests = queue.Queue() self._completed_requests: queue.Queue = queue.Queue()
# Define the callback function. Note the last two parameters should be # Define the callback function. Note the last two parameters should be
# result and error. InferenceServerClient would povide the results of an # result and error. InferenceServerClient would povide the results of an
......
...@@ -114,13 +114,13 @@ vllm_configs = { ...@@ -114,13 +114,13 @@ vllm_configs = {
directory=vllm_dir, directory=vllm_dir,
script_name="agg_lmcache.sh", script_name="agg_lmcache.sh",
marks=[ marks=[
pytest.mark.lmcache,
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.timeout(360), # 3x estimated time (70s) + download time (150s) pytest.mark.timeout(360), # 3x estimated time (70s) + download time (150s)
pytest.mark.skipif( pytest.mark.skipif(
_is_cuda13(), _is_cuda13(),
reason="lmcache does not support CUDA 13 as of v0.3.11", reason="lmcache does not support CUDA 13 as of v0.3.11",
strict=False,
), ),
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
...@@ -136,13 +136,13 @@ vllm_configs = { ...@@ -136,13 +136,13 @@ vllm_configs = {
directory=vllm_dir, directory=vllm_dir,
script_name="agg_lmcache_multiproc.sh", script_name="agg_lmcache_multiproc.sh",
marks=[ marks=[
pytest.mark.lmcache,
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.timeout(360), # 3x estimated time (70s) + download time (150s) pytest.mark.timeout(360), # 3x estimated time (70s) + download time (150s)
pytest.mark.skipif( pytest.mark.skipif(
_is_cuda13(), _is_cuda13(),
reason="lmcache does not support CUDA 13 as of v0.3.11", reason="lmcache does not support CUDA 13 as of v0.3.11",
strict=False,
), ),
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
...@@ -317,7 +317,8 @@ vllm_configs = { ...@@ -317,7 +317,8 @@ vllm_configs = {
name="multimodal_agg_frontend_decoding", name="multimodal_agg_frontend_decoding",
directory=vllm_dir, directory=vllm_dir,
script_name="agg_multimodal.sh", script_name="agg_multimodal.sh",
marks=[pytest.mark.gpu_1, pytest.mark.pre_merge], # post_merge because needs real NIXL not stub
marks=[pytest.mark.gpu_1, pytest.mark.post_merge],
model="Qwen/Qwen2-VL-2B-Instruct", model="Qwen/Qwen2-VL-2B-Instruct",
# Pass --frontend-decoding to enable Rust frontend image decoding + NIXL RDMA transfer # Pass --frontend-decoding to enable Rust frontend image decoding + NIXL RDMA transfer
script_args=[ script_args=[
...@@ -351,7 +352,7 @@ vllm_configs = { ...@@ -351,7 +352,7 @@ vllm_configs = {
script_name="disagg_multimodal_epd.sh", script_name="disagg_multimodal_epd.sh",
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.post_merge,
pytest.mark.skip(reason="DYN-2265"), pytest.mark.skip(reason="DYN-2265"),
], ],
model="Qwen/Qwen3-VL-2B-Instruct", model="Qwen/Qwen3-VL-2B-Instruct",
...@@ -388,7 +389,7 @@ vllm_configs = { ...@@ -388,7 +389,7 @@ vllm_configs = {
name="multimodal_agg_qwen", name="multimodal_agg_qwen",
directory=vllm_dir, directory=vllm_dir,
script_name="agg_multimodal.sh", script_name="agg_multimodal.sh",
marks=[pytest.mark.gpu_1, pytest.mark.pre_merge], marks=[pytest.mark.gpu_1, pytest.mark.post_merge],
model="Qwen/Qwen2.5-VL-7B-Instruct", model="Qwen/Qwen2.5-VL-7B-Instruct",
script_args=["--model", "Qwen/Qwen2.5-VL-7B-Instruct"], script_args=["--model", "Qwen/Qwen2.5-VL-7B-Instruct"],
delayed_start=0, delayed_start=0,
......
...@@ -198,7 +198,7 @@ def metric_payload_default( ...@@ -198,7 +198,7 @@ def metric_payload_default(
Returns: Returns:
Backend-specific MetricsPayload subclass based on backend parameter Backend-specific MetricsPayload subclass based on backend parameter
""" """
common_args = { common_args: dict[str, Any] = {
"body": {}, "body": {},
"repeat_count": repeat_count, "repeat_count": repeat_count,
"expected_log": expected_log or [], "expected_log": expected_log or [],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment