test: remove pytest_runtestloop (#4886)

Signed-off-by: PeaBrane <yanrpei@gmail.com>

test: remove pytest_runtestloop (#4886)
Signed-off-by: PeaBrane <yanrpei@gmail.com>
d0e95c39 · Yan Ru Pei · GitHub · 53cec4ac · d0e95c39 · d0e95c39
Unverified Commit d0e95c39 authored Dec 11, 2025 by Yan Ru Pei Committed by GitHub Dec 11, 2025
9 changed files
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -140,6 +140,7 @@ known_first_party = ["dynamo"]
 [tool.pytest.ini_options]
 minversion = "8.0"
 tmp_path_retention_policy = "failed"
+timeout_func_only = true
 # NOTE
 # We ignore model.py explicitly here to avoid mypy errors with duplicate modules

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,7 +5,6 @@ import logging
 import os
 import shutil
 import tempfile
-import time
 from pathlib import Path
 from typing import Optional
@@ -223,26 +222,6 @@ def pytest_collection_modifyitems(config, items):
        config.models_to_download = models_to_download
-def pytest_runtestloop(session):
-    """Download models after collection but before any tests run.
-    This hook runs after pytest_collection_modifyitems (so models are collected)
-    but before any test execution, ensuring model downloads don't count against test timeouts.
-    """
-    models = getattr(session.config, "models_to_download", None)
-    if models:
-        logging.info(
-            f"Downloading {len(models)} models before test execution\nModels: {models}"
-        )
-        start_time = time.time()
-        download_models(model_list=list(models))
-        download_duration = time.time() - start_time
-        logging.info(f"Model download completed in {download_duration:.1f}s")
 class EtcdServer(ManagedProcess):
    def __init__(self, request, port=2379, timeout=300):
        # Allocate free ports if port is 0

--- a/tests/fault_tolerance/cancellation/test_sglang.py
+++ b/tests/fault_tolerance/cancellation/test_sglang.py
@@ -187,7 +187,7 @@ class DynamoWorkerProcess(ManagedProcess):
 @pytest.mark.xfail(strict=False)
 @pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
 def test_request_cancellation_sglang_aggregated(
-    request, runtime_services_dynamic_ports
+    request, runtime_services_dynamic_ports, predownload_models
 ):
    """
    End-to-end test for request cancellation functionality in aggregated mode.
@@ -305,7 +305,7 @@ def test_request_cancellation_sglang_aggregated(
    indirect=True,
 )
 def test_request_cancellation_sglang_decode_cancel(
-    request, runtime_services_dynamic_ports
+    request, runtime_services_dynamic_ports, predownload_models
 ):
    """
    End-to-end test for request cancellation during decode phase.

--- a/tests/fault_tolerance/cancellation/test_trtllm.py
+++ b/tests/fault_tolerance/cancellation/test_trtllm.py
@@ -166,7 +166,7 @@ class DynamoWorkerProcess(ManagedProcess):
 @pytest.mark.timeout(140)  # 3x average
 @pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
 def test_request_cancellation_trtllm_aggregated(
-    request, runtime_services_dynamic_ports
+    request, runtime_services_dynamic_ports, predownload_models
 ):
    """
    End-to-end test for request cancellation functionality in aggregated mode.
@@ -264,7 +264,7 @@ def test_request_cancellation_trtllm_aggregated(
    indirect=True,
 )
 def test_request_cancellation_trtllm_decode_cancel(
-    request, runtime_services_dynamic_ports
+    request, runtime_services_dynamic_ports, predownload_models
 ):
    """
    End-to-end test for request cancellation during decode phase with unified frontend.
@@ -359,7 +359,7 @@ def test_request_cancellation_trtllm_decode_cancel(
    indirect=True,
 )
 def test_request_cancellation_trtllm_prefill_cancel(
-    request, runtime_services_dynamic_ports
+    request, runtime_services_dynamic_ports, predownload_models
 ):
    """
    End-to-end test for request cancellation during prefill phase with unified frontend.
@@ -456,7 +456,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
    strict=False,
 )
 def test_request_cancellation_trtllm_kv_transfer_cancel(
-    request, runtime_services_dynamic_ports
+    request, runtime_services_dynamic_ports, predownload_models
 ):
    """
    End-to-end test for request cancellation during prefill to decode KV transfer phase.

--- a/tests/fault_tolerance/cancellation/test_vllm.py
+++ b/tests/fault_tolerance/cancellation/test_vllm.py
@@ -167,7 +167,9 @@ class DynamoWorkerProcess(ManagedProcess):
 @pytest.mark.timeout(110)  # 3x average
 @pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
-def test_request_cancellation_vllm_aggregated(request, runtime_services_dynamic_ports):
+def test_request_cancellation_vllm_aggregated(
+    request, runtime_services_dynamic_ports, predownload_models
+):
    """
    End-to-end test for request cancellation functionality in aggregated mode.
@@ -258,7 +260,7 @@ def test_request_cancellation_vllm_aggregated(request, runtime_services_dynamic_
    indirect=True,
 )
 def test_request_cancellation_vllm_decode_cancel(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for request cancellation during decode phase.
@@ -350,7 +352,7 @@ def test_request_cancellation_vllm_decode_cancel(
    indirect=True,
 )
 def test_request_cancellation_vllm_prefill_cancel(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for request cancellation during prefill phase.

--- a/tests/fault_tolerance/migration/test_sglang.py
+++ b/tests/fault_tolerance/migration/test_sglang.py
@@ -151,7 +151,7 @@ class DynamoWorkerProcess(ManagedProcess):
    indirect=True,
 )
 def test_request_migration_sglang_worker_failure(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with migration support using SGLang.
@@ -228,7 +228,7 @@ def test_request_migration_sglang_worker_failure(
    indirect=True,
 )
 def test_request_migration_sglang_graceful_shutdown(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with graceful shutdown and migration support using SGLang.
@@ -308,7 +308,7 @@ def test_request_migration_sglang_graceful_shutdown(
    indirect=True,
 )
 def test_no_request_migration_sglang_worker_failure(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with migration disabled using SGLang.
@@ -403,7 +403,7 @@ def test_no_request_migration_sglang_worker_failure(
    indirect=True,
 )
 def test_no_request_migration_sglang_graceful_shutdown(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using SGLang.

--- a/tests/fault_tolerance/migration/test_trtllm.py
+++ b/tests/fault_tolerance/migration/test_trtllm.py
@@ -149,7 +149,7 @@ class DynamoWorkerProcess(ManagedProcess):
    indirect=True,
 )
 def test_request_migration_trtllm_worker_failure(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with migration support using TRT-LLM.
@@ -213,7 +213,7 @@ def test_request_migration_trtllm_worker_failure(
    indirect=True,
 )
 def test_request_migration_trtllm_graceful_shutdown(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with graceful shutdown and migration support using TRT-LLM.
@@ -281,7 +281,7 @@ def test_request_migration_trtllm_graceful_shutdown(
    indirect=True,
 )
 def test_no_request_migration_trtllm_worker_failure(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with migration disabled using TRT-LLM.
@@ -369,7 +369,7 @@ def test_no_request_migration_trtllm_worker_failure(
    indirect=True,
 )
 def test_no_request_migration_trtllm_graceful_shutdown(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using TRT-LLM.

--- a/tests/fault_tolerance/migration/test_vllm.py
+++ b/tests/fault_tolerance/migration/test_vllm.py
@@ -159,7 +159,7 @@ class DynamoWorkerProcess(ManagedProcess):
    indirect=True,
 )
 def test_request_migration_vllm_worker_failure(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with migration support.
@@ -223,7 +223,7 @@ def test_request_migration_vllm_worker_failure(
    indirect=True,
 )
 def test_request_migration_vllm_graceful_shutdown(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with graceful shutdown and migration support.
@@ -291,7 +291,7 @@ def test_request_migration_vllm_graceful_shutdown(
    indirect=True,
 )
 def test_no_request_migration_vllm_worker_failure(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with migration disabled.
@@ -373,7 +373,7 @@ def test_no_request_migration_vllm_worker_failure(
    indirect=True,
 )
 def test_no_request_migration_vllm_graceful_shutdown(
-    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
+    request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
 ):
    """
    End-to-end test for worker fault tolerance with graceful shutdown and migration disabled.

--- a/tests/frontend/test_completion_mocker_engine.py
+++ b/tests/frontend/test_completion_mocker_engine.py
@@ -142,7 +142,7 @@ def runtime_services(request):
 @pytest.fixture(scope="module")
-def start_services(request, runtime_services):
+def start_services(request, runtime_services, predownload_tokenizers):
    """Start frontend and worker processes once for this module's tests."""
    with DynamoFrontendProcess(request):
        logger.info("Frontend started for tests")