"vscode:/vscode.git/clone" did not exist on "c2f0baa4bcad146433650fea435b44d006c85849"
Unverified Commit 4bd6299b authored by Graham King's avatar Graham King Committed by GitHub
Browse files

test: pytests able to run locally now (#7219)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent 5d5fd243
......@@ -38,4 +38,5 @@ tabulate==0.9.0
types-aiofiles>=24.1.0
types-PyYAML==6.0.12.20250915
types-requests==2.32.4.20250913
types-tabulate>=0.9.0
websocket-client==1.9.0
......@@ -318,6 +318,12 @@ module = ["vllm.*"]
follow_imports = "skip"
ignore_missing_imports = true
[[tool.mypy.overrides]]
# WAR mypy 1.18.x crash with numpy 1.26.x stubs:
# "Should never get here in normal mode, got TypeAlias:numpy.float64 instead of TypeInfo"
module = ["numpy", "numpy.*"]
follow_imports = "skip"
[tool.sphinx]
# extra-content-head
......
......@@ -26,6 +26,7 @@ def test_no_bundled_shared_libraries():
except PackageNotFoundError:
pytest.fail("ai-dynamo-runtime is not installed")
assert installed_files is not None, "ai-dynamo-runtime has no recorded files"
bundled_libs = [
str(f) for f in installed_files if ".libs/" in str(f) and ".so" in str(f)
]
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import importlib.util
import logging
import os
import shutil
......@@ -50,6 +51,7 @@ def pytest_configure(config):
"vllm: marks tests as requiring vllm",
"trtllm: marks tests as requiring trtllm",
"sglang: marks tests as requiring sglang",
"lmcache: mark tests as requiring lmcache",
"multimodal: marks tests as multimodal (image/video) tests",
"slow: marks tests as known to be slow",
"h100: marks tests to run on H100",
......@@ -282,11 +284,40 @@ def logger(request):
logger.removeHandler(handler)
def _item_has_marker(item, marker_name):
"""Check if a test item has a marker, including module-level pytestmark."""
if item.get_closest_marker(marker_name):
return True
module = getattr(item, "module", None)
if module is not None:
marks = getattr(module, "pytestmark", [])
if not isinstance(marks, list):
marks = [marks]
if any(getattr(m, "name", "") == marker_name for m in marks):
return True
return False
@pytest.hookimpl(trylast=True)
def pytest_collection_modifyitems(config, items):
"""
This function is called to modify the list of tests to run.
"""
# Auto-skip tests marked with a framework marker when the framework is not installed
framework_markers = {
"trtllm": "tensorrt_llm",
"vllm": "vllm",
"sglang": "sglang",
"kvbm": "kvbm",
"lmcache": "lmcache",
}
for marker_name, module_name in framework_markers.items():
if importlib.util.find_spec(module_name) is None:
skip = pytest.mark.skip(reason=f"{module_name} is not installed")
for item in items:
if _item_has_marker(item, marker_name):
item.add_marker(skip)
# Collect models via explicit pytest mark from final filtered items only
models_to_download = set()
for item in items:
......
......@@ -54,7 +54,7 @@ def _check_kvbm_imports():
# Base tests (no framework markers) - run in main job with --framework none --enable-kvbm
@pytest.mark.pre_merge
@pytest.mark.post_merge
@pytest.mark.gpu_0
@pytest.mark.unit
def test_kvbm_wheel_exists():
......@@ -62,7 +62,7 @@ def test_kvbm_wheel_exists():
_check_kvbm_wheel_exists()
@pytest.mark.pre_merge
@pytest.mark.post_merge
@pytest.mark.gpu_0
@pytest.mark.unit
def test_kvbm_imports():
......@@ -71,7 +71,7 @@ def test_kvbm_imports():
# vLLM-specific tests - run in vLLM job (vLLM auto-enables KVBM)
@pytest.mark.pre_merge
@pytest.mark.post_merge
@pytest.mark.vllm
@pytest.mark.unit
@pytest.mark.gpu_0
......@@ -80,7 +80,7 @@ def test_kvbm_wheel_exists_vllm():
_check_kvbm_wheel_exists()
@pytest.mark.pre_merge
@pytest.mark.post_merge
@pytest.mark.vllm
@pytest.mark.unit
@pytest.mark.gpu_0
......@@ -90,7 +90,7 @@ def test_kvbm_imports_vllm():
# TRT-LLM-specific tests - run in TRT-LLM job (TRT-LLM auto-enables KVBM)
@pytest.mark.pre_merge
@pytest.mark.post_merge
@pytest.mark.trtllm
@pytest.mark.unit
@pytest.mark.gpu_0
......@@ -99,7 +99,7 @@ def test_kvbm_wheel_exists_trtllm():
_check_kvbm_wheel_exists()
@pytest.mark.pre_merge
@pytest.mark.post_merge
@pytest.mark.trtllm
@pytest.mark.unit
@pytest.mark.gpu_0
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Unit tests to sanity check that required dependencies can be imported."""
import pytest
@pytest.mark.vllm
@pytest.mark.unit
@pytest.mark.gpu_1
@pytest.mark.pre_merge
def test_import_deep_ep():
"""Test that deep_ep module can be imported."""
try:
import deep_ep
assert deep_ep is not None
except ImportError as e:
pytest.fail(f"Failed to import deep_ep: {e}")
@pytest.mark.vllm
@pytest.mark.unit
@pytest.mark.gpu_1
@pytest.mark.pre_merge
def test_import_pplx_kernels():
"""Test that pplx_kernels module can be imported."""
try:
import pplx_kernels
assert pplx_kernels is not None
except ImportError as e:
pytest.fail(f"Failed to import pplx_kernels: {e}")
......@@ -20,7 +20,7 @@ import time
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from enum import Enum, auto
from typing import TYPE_CHECKING, Dict, List, Optional, Pattern
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Pattern
from typing_extensions import Required, TypedDict
......@@ -568,7 +568,7 @@ class TerminateProcessFailure(Failure):
f"Checking Frontend service health (after {service_name} pod restart)..."
)
pod_ports = {} # Temporary dict for port forward tracking
pod_ports: dict[str, Any] = {} # Temporary dict for port forward tracking
try:
logger.info("Getting frontend pod and setting up port forward...")
frontend_pod_name, local_port, frontend_pod = get_frontend_port(
......
......@@ -9,7 +9,7 @@ import re
import signal
from contextlib import contextmanager
from multiprocessing.context import SpawnProcess
from typing import Any
from typing import Any, Optional
import pytest
......@@ -31,8 +31,8 @@ from tests.utils.test_output import resolve_test_output_path
def get_model_from_deployment(
deployment_spec: DeploymentSpec,
scenario: Scenario = None,
service_name: str = None,
scenario: Optional[Scenario] = None,
service_name: Optional[str] = None,
) -> str:
"""Get model name from deployment spec.
......@@ -60,19 +60,22 @@ def get_model_from_deployment(
# Get model from backend-specific worker (if scenario provided)
if scenario:
try:
model: Optional[str] = None
if scenario.backend == "vllm":
return deployment_spec["VllmDecodeWorker"].model
model = deployment_spec["VllmDecodeWorker"].model
elif scenario.backend == "sglang":
return deployment_spec["decode"].model
model = deployment_spec["decode"].model
elif scenario.backend == "trtllm":
# Determine deployment type from scenario deployment name
if (
"agg" in deployment_spec.name
and "disagg" not in deployment_spec.name
):
return deployment_spec["TRTLLMWorker"].model
model = deployment_spec["TRTLLMWorker"].model
else:
return deployment_spec["TRTLLMDecodeWorker"].model
model = deployment_spec["TRTLLMDecodeWorker"].model
if model:
return model
except (KeyError, AttributeError) as e:
logging.warning(
f"Could not get model from backend-specific worker "
......@@ -290,6 +293,8 @@ async def _inject_failures(
return affected_pods
# TODO: These globals might not work in parallel testing. FIXME
global_result_list = []
# Global storage for test results (used by validation fixture)
test_results_cache = {}
......@@ -489,6 +494,7 @@ def results_summary():
@pytest.mark.post_merge
@pytest.mark.e2e
@pytest.mark.slow
@pytest.mark.gpu_0
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
async def test_fault_scenario(
scenario: Scenario, # noqa: F811
......
......@@ -46,6 +46,7 @@ async def echo_tensor_worker(runtime: DistributedRuntime):
# Internally the bytes string will be converted to List of int
retrieved_model_config = runtime_config.get_tensor_model_config()
assert retrieved_model_config is not None
retrieved_model_config["triton_model_config"] = bytes(
retrieved_model_config["triton_model_config"]
)
......@@ -63,7 +64,7 @@ async def echo_tensor_worker(runtime: DistributedRuntime):
await endpoint.serve_endpoint(generate)
async def generate(request, context):
async def generate(request):
"""Echo tensors and parameters back to the client."""
# [NOTE] gluo: currently there is no frontend side
# validation between model config and actual request,
......
......@@ -86,7 +86,7 @@ class TritonEchoClient:
class UserData:
def __init__(self):
self._completed_requests = queue.Queue()
self._completed_requests: queue.Queue = queue.Queue()
# Define the callback function. Note the last two parameters should be
# result and error. InferenceServerClient would povide the results of an
......
......@@ -114,13 +114,13 @@ vllm_configs = {
directory=vllm_dir,
script_name="agg_lmcache.sh",
marks=[
pytest.mark.lmcache,
pytest.mark.gpu_1,
pytest.mark.pre_merge,
pytest.mark.timeout(360), # 3x estimated time (70s) + download time (150s)
pytest.mark.skipif(
_is_cuda13(),
reason="lmcache does not support CUDA 13 as of v0.3.11",
strict=False,
),
],
model="Qwen/Qwen3-0.6B",
......@@ -136,13 +136,13 @@ vllm_configs = {
directory=vllm_dir,
script_name="agg_lmcache_multiproc.sh",
marks=[
pytest.mark.lmcache,
pytest.mark.gpu_1,
pytest.mark.pre_merge,
pytest.mark.timeout(360), # 3x estimated time (70s) + download time (150s)
pytest.mark.skipif(
_is_cuda13(),
reason="lmcache does not support CUDA 13 as of v0.3.11",
strict=False,
),
],
model="Qwen/Qwen3-0.6B",
......@@ -317,7 +317,8 @@ vllm_configs = {
name="multimodal_agg_frontend_decoding",
directory=vllm_dir,
script_name="agg_multimodal.sh",
marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
# post_merge because needs real NIXL not stub
marks=[pytest.mark.gpu_1, pytest.mark.post_merge],
model="Qwen/Qwen2-VL-2B-Instruct",
# Pass --frontend-decoding to enable Rust frontend image decoding + NIXL RDMA transfer
script_args=[
......@@ -351,7 +352,7 @@ vllm_configs = {
script_name="disagg_multimodal_epd.sh",
marks=[
pytest.mark.gpu_1,
pytest.mark.pre_merge,
pytest.mark.post_merge,
pytest.mark.skip(reason="DYN-2265"),
],
model="Qwen/Qwen3-VL-2B-Instruct",
......@@ -388,7 +389,7 @@ vllm_configs = {
name="multimodal_agg_qwen",
directory=vllm_dir,
script_name="agg_multimodal.sh",
marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
marks=[pytest.mark.gpu_1, pytest.mark.post_merge],
model="Qwen/Qwen2.5-VL-7B-Instruct",
script_args=["--model", "Qwen/Qwen2.5-VL-7B-Instruct"],
delayed_start=0,
......
......@@ -198,7 +198,7 @@ def metric_payload_default(
Returns:
Backend-specific MetricsPayload subclass based on backend parameter
"""
common_args = {
common_args: dict[str, Any] = {
"body": {},
"repeat_count": repeat_count,
"expected_log": expected_log or [],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment