Unverified Commit d0e95c39 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

test: remove pytest_runtestloop (#4886)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 53cec4ac
...@@ -140,6 +140,7 @@ known_first_party = ["dynamo"] ...@@ -140,6 +140,7 @@ known_first_party = ["dynamo"]
[tool.pytest.ini_options] [tool.pytest.ini_options]
minversion = "8.0" minversion = "8.0"
tmp_path_retention_policy = "failed" tmp_path_retention_policy = "failed"
timeout_func_only = true
# NOTE # NOTE
# We ignore model.py explicitly here to avoid mypy errors with duplicate modules # We ignore model.py explicitly here to avoid mypy errors with duplicate modules
......
...@@ -5,7 +5,6 @@ import logging ...@@ -5,7 +5,6 @@ import logging
import os import os
import shutil import shutil
import tempfile import tempfile
import time
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
...@@ -223,26 +222,6 @@ def pytest_collection_modifyitems(config, items): ...@@ -223,26 +222,6 @@ def pytest_collection_modifyitems(config, items):
config.models_to_download = models_to_download config.models_to_download = models_to_download
def pytest_runtestloop(session):
"""Download models after collection but before any tests run.
This hook runs after pytest_collection_modifyitems (so models are collected)
but before any test execution, ensuring model downloads don't count against test timeouts.
"""
models = getattr(session.config, "models_to_download", None)
if models:
logging.info(
f"Downloading {len(models)} models before test execution\nModels: {models}"
)
start_time = time.time()
download_models(model_list=list(models))
download_duration = time.time() - start_time
logging.info(f"Model download completed in {download_duration:.1f}s")
class EtcdServer(ManagedProcess): class EtcdServer(ManagedProcess):
def __init__(self, request, port=2379, timeout=300): def __init__(self, request, port=2379, timeout=300):
# Allocate free ports if port is 0 # Allocate free ports if port is 0
......
...@@ -187,7 +187,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -187,7 +187,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.xfail(strict=False) @pytest.mark.xfail(strict=False)
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True) @pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
def test_request_cancellation_sglang_aggregated( def test_request_cancellation_sglang_aggregated(
request, runtime_services_dynamic_ports request, runtime_services_dynamic_ports, predownload_models
): ):
""" """
End-to-end test for request cancellation functionality in aggregated mode. End-to-end test for request cancellation functionality in aggregated mode.
...@@ -305,7 +305,7 @@ def test_request_cancellation_sglang_aggregated( ...@@ -305,7 +305,7 @@ def test_request_cancellation_sglang_aggregated(
indirect=True, indirect=True,
) )
def test_request_cancellation_sglang_decode_cancel( def test_request_cancellation_sglang_decode_cancel(
request, runtime_services_dynamic_ports request, runtime_services_dynamic_ports, predownload_models
): ):
""" """
End-to-end test for request cancellation during decode phase. End-to-end test for request cancellation during decode phase.
......
...@@ -166,7 +166,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -166,7 +166,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(140) # 3x average @pytest.mark.timeout(140) # 3x average
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True) @pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
def test_request_cancellation_trtllm_aggregated( def test_request_cancellation_trtllm_aggregated(
request, runtime_services_dynamic_ports request, runtime_services_dynamic_ports, predownload_models
): ):
""" """
End-to-end test for request cancellation functionality in aggregated mode. End-to-end test for request cancellation functionality in aggregated mode.
...@@ -264,7 +264,7 @@ def test_request_cancellation_trtllm_aggregated( ...@@ -264,7 +264,7 @@ def test_request_cancellation_trtllm_aggregated(
indirect=True, indirect=True,
) )
def test_request_cancellation_trtllm_decode_cancel( def test_request_cancellation_trtllm_decode_cancel(
request, runtime_services_dynamic_ports request, runtime_services_dynamic_ports, predownload_models
): ):
""" """
End-to-end test for request cancellation during decode phase with unified frontend. End-to-end test for request cancellation during decode phase with unified frontend.
...@@ -359,7 +359,7 @@ def test_request_cancellation_trtllm_decode_cancel( ...@@ -359,7 +359,7 @@ def test_request_cancellation_trtllm_decode_cancel(
indirect=True, indirect=True,
) )
def test_request_cancellation_trtllm_prefill_cancel( def test_request_cancellation_trtllm_prefill_cancel(
request, runtime_services_dynamic_ports request, runtime_services_dynamic_ports, predownload_models
): ):
""" """
End-to-end test for request cancellation during prefill phase with unified frontend. End-to-end test for request cancellation during prefill phase with unified frontend.
...@@ -456,7 +456,7 @@ def test_request_cancellation_trtllm_prefill_cancel( ...@@ -456,7 +456,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
strict=False, strict=False,
) )
def test_request_cancellation_trtllm_kv_transfer_cancel( def test_request_cancellation_trtllm_kv_transfer_cancel(
request, runtime_services_dynamic_ports request, runtime_services_dynamic_ports, predownload_models
): ):
""" """
End-to-end test for request cancellation during prefill to decode KV transfer phase. End-to-end test for request cancellation during prefill to decode KV transfer phase.
......
...@@ -167,7 +167,9 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -167,7 +167,9 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(110) # 3x average @pytest.mark.timeout(110) # 3x average
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True) @pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
def test_request_cancellation_vllm_aggregated(request, runtime_services_dynamic_ports): def test_request_cancellation_vllm_aggregated(
request, runtime_services_dynamic_ports, predownload_models
):
""" """
End-to-end test for request cancellation functionality in aggregated mode. End-to-end test for request cancellation functionality in aggregated mode.
...@@ -258,7 +260,7 @@ def test_request_cancellation_vllm_aggregated(request, runtime_services_dynamic_ ...@@ -258,7 +260,7 @@ def test_request_cancellation_vllm_aggregated(request, runtime_services_dynamic_
indirect=True, indirect=True,
) )
def test_request_cancellation_vllm_decode_cancel( def test_request_cancellation_vllm_decode_cancel(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for request cancellation during decode phase. End-to-end test for request cancellation during decode phase.
...@@ -350,7 +352,7 @@ def test_request_cancellation_vllm_decode_cancel( ...@@ -350,7 +352,7 @@ def test_request_cancellation_vllm_decode_cancel(
indirect=True, indirect=True,
) )
def test_request_cancellation_vllm_prefill_cancel( def test_request_cancellation_vllm_prefill_cancel(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for request cancellation during prefill phase. End-to-end test for request cancellation during prefill phase.
......
...@@ -151,7 +151,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -151,7 +151,7 @@ class DynamoWorkerProcess(ManagedProcess):
indirect=True, indirect=True,
) )
def test_request_migration_sglang_worker_failure( def test_request_migration_sglang_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with migration support using SGLang. End-to-end test for worker fault tolerance with migration support using SGLang.
...@@ -228,7 +228,7 @@ def test_request_migration_sglang_worker_failure( ...@@ -228,7 +228,7 @@ def test_request_migration_sglang_worker_failure(
indirect=True, indirect=True,
) )
def test_request_migration_sglang_graceful_shutdown( def test_request_migration_sglang_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration support using SGLang. End-to-end test for worker fault tolerance with graceful shutdown and migration support using SGLang.
...@@ -308,7 +308,7 @@ def test_request_migration_sglang_graceful_shutdown( ...@@ -308,7 +308,7 @@ def test_request_migration_sglang_graceful_shutdown(
indirect=True, indirect=True,
) )
def test_no_request_migration_sglang_worker_failure( def test_no_request_migration_sglang_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with migration disabled using SGLang. End-to-end test for worker fault tolerance with migration disabled using SGLang.
...@@ -403,7 +403,7 @@ def test_no_request_migration_sglang_worker_failure( ...@@ -403,7 +403,7 @@ def test_no_request_migration_sglang_worker_failure(
indirect=True, indirect=True,
) )
def test_no_request_migration_sglang_graceful_shutdown( def test_no_request_migration_sglang_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using SGLang. End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using SGLang.
......
...@@ -149,7 +149,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -149,7 +149,7 @@ class DynamoWorkerProcess(ManagedProcess):
indirect=True, indirect=True,
) )
def test_request_migration_trtllm_worker_failure( def test_request_migration_trtllm_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with migration support using TRT-LLM. End-to-end test for worker fault tolerance with migration support using TRT-LLM.
...@@ -213,7 +213,7 @@ def test_request_migration_trtllm_worker_failure( ...@@ -213,7 +213,7 @@ def test_request_migration_trtllm_worker_failure(
indirect=True, indirect=True,
) )
def test_request_migration_trtllm_graceful_shutdown( def test_request_migration_trtllm_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration support using TRT-LLM. End-to-end test for worker fault tolerance with graceful shutdown and migration support using TRT-LLM.
...@@ -281,7 +281,7 @@ def test_request_migration_trtllm_graceful_shutdown( ...@@ -281,7 +281,7 @@ def test_request_migration_trtllm_graceful_shutdown(
indirect=True, indirect=True,
) )
def test_no_request_migration_trtllm_worker_failure( def test_no_request_migration_trtllm_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with migration disabled using TRT-LLM. End-to-end test for worker fault tolerance with migration disabled using TRT-LLM.
...@@ -369,7 +369,7 @@ def test_no_request_migration_trtllm_worker_failure( ...@@ -369,7 +369,7 @@ def test_no_request_migration_trtllm_worker_failure(
indirect=True, indirect=True,
) )
def test_no_request_migration_trtllm_graceful_shutdown( def test_no_request_migration_trtllm_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using TRT-LLM. End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using TRT-LLM.
......
...@@ -159,7 +159,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -159,7 +159,7 @@ class DynamoWorkerProcess(ManagedProcess):
indirect=True, indirect=True,
) )
def test_request_migration_vllm_worker_failure( def test_request_migration_vllm_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with migration support. End-to-end test for worker fault tolerance with migration support.
...@@ -223,7 +223,7 @@ def test_request_migration_vllm_worker_failure( ...@@ -223,7 +223,7 @@ def test_request_migration_vllm_worker_failure(
indirect=True, indirect=True,
) )
def test_request_migration_vllm_graceful_shutdown( def test_request_migration_vllm_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration support. End-to-end test for worker fault tolerance with graceful shutdown and migration support.
...@@ -291,7 +291,7 @@ def test_request_migration_vllm_graceful_shutdown( ...@@ -291,7 +291,7 @@ def test_request_migration_vllm_graceful_shutdown(
indirect=True, indirect=True,
) )
def test_no_request_migration_vllm_worker_failure( def test_no_request_migration_vllm_worker_failure(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with migration disabled. End-to-end test for worker fault tolerance with migration disabled.
...@@ -373,7 +373,7 @@ def test_no_request_migration_vllm_worker_failure( ...@@ -373,7 +373,7 @@ def test_no_request_migration_vllm_worker_failure(
indirect=True, indirect=True,
) )
def test_no_request_migration_vllm_graceful_shutdown( def test_no_request_migration_vllm_graceful_shutdown(
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled. End-to-end test for worker fault tolerance with graceful shutdown and migration disabled.
......
...@@ -142,7 +142,7 @@ def runtime_services(request): ...@@ -142,7 +142,7 @@ def runtime_services(request):
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def start_services(request, runtime_services): def start_services(request, runtime_services, predownload_tokenizers):
"""Start frontend and worker processes once for this module's tests.""" """Start frontend and worker processes once for this module's tests."""
with DynamoFrontendProcess(request): with DynamoFrontendProcess(request):
logger.info("Frontend started for tests") logger.info("Frontend started for tests")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment