Unverified Commit d0e95c39 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

test: remove pytest_runtestloop (#4886)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 53cec4ac
......@@ -140,6 +140,7 @@ known_first_party = ["dynamo"]
[tool.pytest.ini_options]
minversion = "8.0"
tmp_path_retention_policy = "failed"
timeout_func_only = true
# NOTE
# We ignore model.py explicitly here to avoid mypy errors with duplicate modules
......
......@@ -5,7 +5,6 @@ import logging
import os
import shutil
import tempfile
import time
from pathlib import Path
from typing import Optional
......@@ -223,26 +222,6 @@ def pytest_collection_modifyitems(config, items):
config.models_to_download = models_to_download
def pytest_runtestloop(session):
"""Download models after collection but before any tests run.
This hook runs after pytest_collection_modifyitems (so models are collected)
but before any test execution, ensuring model downloads don't count against test timeouts.
"""
models = getattr(session.config, "models_to_download", None)
if models:
logging.info(
f"Downloading {len(models)} models before test execution\nModels: {models}"
)
start_time = time.time()
download_models(model_list=list(models))
download_duration = time.time() - start_time
logging.info(f"Model download completed in {download_duration:.1f}s")
class EtcdServer(ManagedProcess):
def __init__(self, request, port=2379, timeout=300):
# Allocate free ports if port is 0
......
......@@ -187,7 +187,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.xfail(strict=False)
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
def test_request_cancellation_sglang_aggregated(
request, runtime_services_dynamic_ports
request, runtime_services_dynamic_ports, predownload_models
):
"""
End-to-end test for request cancellation functionality in aggregated mode.
......@@ -305,7 +305,7 @@ def test_request_cancellation_sglang_aggregated(
indirect=True,
)
def test_request_cancellation_sglang_decode_cancel(
request, runtime_services_dynamic_ports
request, runtime_services_dynamic_ports, predownload_models
):
"""
End-to-end test for request cancellation during decode phase.
......
......@@ -166,7 +166,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(140) # 3x average
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
def test_request_cancellation_trtllm_aggregated(
request, runtime_services_dynamic_ports
request, runtime_services_dynamic_ports, predownload_models
):
"""
End-to-end test for request cancellation functionality in aggregated mode.
......@@ -264,7 +264,7 @@ def test_request_cancellation_trtllm_aggregated(
indirect=True,
)
def test_request_cancellation_trtllm_decode_cancel(
request, runtime_services_dynamic_ports
request, runtime_services_dynamic_ports, predownload_models
):
"""
End-to-end test for request cancellation during decode phase with unified frontend.
......@@ -359,7 +359,7 @@ def test_request_cancellation_trtllm_decode_cancel(
indirect=True,
)
def test_request_cancellation_trtllm_prefill_cancel(
request, runtime_services_dynamic_ports
request, runtime_services_dynamic_ports, predownload_models
):
"""
End-to-end test for request cancellation during prefill phase with unified frontend.
......@@ -456,7 +456,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
strict=False,
)
def test_request_cancellation_trtllm_kv_transfer_cancel(
request, runtime_services_dynamic_ports
request, runtime_services_dynamic_ports, predownload_models
):
"""
End-to-end test for request cancellation during prefill to decode KV transfer phase.
......
......@@ -167,7 +167,9 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(110) # 3x average
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
def test_request_cancellation_vllm_aggregated(request, runtime_services_dynamic_ports):
def test_request_cancellation_vllm_aggregated(
request, runtime_services_dynamic_ports, predownload_models
):
"""
End-to-end test for request cancellation functionality in aggregated mode.
......@@ -258,7 +260,7 @@ def test_request_cancellation_vllm_aggregated(request, runtime_services_dynamic_
indirect=True,
)
def test_request_cancellation_vllm_decode_cancel(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for request cancellation during decode phase.
......@@ -350,7 +352,7 @@ def test_request_cancellation_vllm_decode_cancel(
indirect=True,
)
def test_request_cancellation_vllm_prefill_cancel(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for request cancellation during prefill phase.
......
......@@ -151,7 +151,7 @@ class DynamoWorkerProcess(ManagedProcess):
indirect=True,
)
def test_request_migration_sglang_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with migration support using SGLang.
......@@ -228,7 +228,7 @@ def test_request_migration_sglang_worker_failure(
indirect=True,
)
def test_request_migration_sglang_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support using SGLang.
......@@ -308,7 +308,7 @@ def test_request_migration_sglang_graceful_shutdown(
indirect=True,
)
def test_no_request_migration_sglang_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with migration disabled using SGLang.
......@@ -403,7 +403,7 @@ def test_no_request_migration_sglang_worker_failure(
indirect=True,
)
def test_no_request_migration_sglang_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using SGLang.
......
......@@ -149,7 +149,7 @@ class DynamoWorkerProcess(ManagedProcess):
indirect=True,
)
def test_request_migration_trtllm_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with migration support using TRT-LLM.
......@@ -213,7 +213,7 @@ def test_request_migration_trtllm_worker_failure(
indirect=True,
)
def test_request_migration_trtllm_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support using TRT-LLM.
......@@ -281,7 +281,7 @@ def test_request_migration_trtllm_graceful_shutdown(
indirect=True,
)
def test_no_request_migration_trtllm_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with migration disabled using TRT-LLM.
......@@ -369,7 +369,7 @@ def test_no_request_migration_trtllm_worker_failure(
indirect=True,
)
def test_no_request_migration_trtllm_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using TRT-LLM.
......
......@@ -159,7 +159,7 @@ class DynamoWorkerProcess(ManagedProcess):
indirect=True,
)
def test_request_migration_vllm_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with migration support.
......@@ -223,7 +223,7 @@ def test_request_migration_vllm_worker_failure(
indirect=True,
)
def test_request_migration_vllm_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support.
......@@ -291,7 +291,7 @@ def test_request_migration_vllm_graceful_shutdown(
indirect=True,
)
def test_no_request_migration_vllm_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with migration disabled.
......@@ -373,7 +373,7 @@ def test_no_request_migration_vllm_worker_failure(
indirect=True,
)
def test_no_request_migration_vllm_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled.
......
......@@ -142,7 +142,7 @@ def runtime_services(request):
@pytest.fixture(scope="module")
def start_services(request, runtime_services):
def start_services(request, runtime_services, predownload_tokenizers):
"""Start frontend and worker processes once for this module's tests."""
with DynamoFrontendProcess(request):
logger.info("Frontend started for tests")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment