Unverified Commit 4ca1679c authored by Jacky's avatar Jacky Committed by GitHub
Browse files

test: Pre-download models before tests are ran (#4811)


Signed-off-by: default avatarJacky <18255193+kthui@users.noreply.github.com>
parent e6de33f8
......@@ -17,6 +17,7 @@ import logging
import os
import shutil
import tempfile
import time
from pathlib import Path
from typing import Optional
......@@ -226,6 +227,26 @@ def pytest_collection_modifyitems(config, items):
config.models_to_download = models_to_download
def pytest_runtestloop(session):
"""Download models after collection but before any tests run.
This hook runs after pytest_collection_modifyitems (so models are collected)
but before any test execution, ensuring model downloads don't count against test timeouts.
"""
models = getattr(session.config, "models_to_download", None)
if models:
logging.info(
f"Downloading {len(models)} models before test execution\nModels: {models}"
)
start_time = time.time()
download_models(model_list=list(models))
download_duration = time.time() - start_time
logging.info(f"Model download completed in {download_duration:.1f}s")
class EtcdServer(ManagedProcess):
def __init__(self, request, port=2379, timeout=300):
port_string = str(port)
......
......@@ -161,9 +161,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(160) # 3x average
@pytest.mark.gpu_1
@pytest.mark.xfail(strict=False)
def test_request_cancellation_sglang_aggregated(
request, runtime_services, predownload_models
):
def test_request_cancellation_sglang_aggregated(request, runtime_services):
"""
End-to-end test for request cancellation functionality in aggregated mode.
......@@ -247,9 +245,7 @@ def test_request_cancellation_sglang_aggregated(
@pytest.mark.timeout(185) # 3x average
@pytest.mark.gpu_2
def test_request_cancellation_sglang_decode_cancel(
request, runtime_services, predownload_models
):
def test_request_cancellation_sglang_decode_cancel(request, runtime_services):
"""
End-to-end test for request cancellation during decode phase.
......
......@@ -141,9 +141,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(140) # 3x average
def test_request_cancellation_trtllm_aggregated(
request, runtime_services, predownload_models
):
def test_request_cancellation_trtllm_aggregated(request, runtime_services):
"""
End-to-end test for request cancellation functionality in aggregated mode.
......@@ -215,9 +213,7 @@ def test_request_cancellation_trtllm_aggregated(
@pytest.mark.timeout(350) # 3x average
def test_request_cancellation_trtllm_decode_cancel(
request, runtime_services, predownload_models
):
def test_request_cancellation_trtllm_decode_cancel(request, runtime_services):
"""
End-to-end test for request cancellation during decode phase with unified frontend.
......@@ -288,9 +284,7 @@ def test_request_cancellation_trtllm_decode_cancel(
@pytest.mark.timeout(350) # 3x average
def test_request_cancellation_trtllm_prefill_cancel(
request, runtime_services, predownload_models
):
def test_request_cancellation_trtllm_prefill_cancel(request, runtime_services):
"""
End-to-end test for request cancellation during prefill phase with unified frontend.
......@@ -375,9 +369,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
reason="May fail due to unknown reason with TRT-LLM or backend implementation",
strict=False,
)
def test_request_cancellation_trtllm_kv_transfer_cancel(
request, runtime_services, predownload_models
):
def test_request_cancellation_trtllm_kv_transfer_cancel(request, runtime_services):
"""
End-to-end test for request cancellation during prefill to decode KV transfer phase.
......
......@@ -134,9 +134,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(110) # 3x average
def test_request_cancellation_vllm_aggregated(
request, runtime_services, predownload_models
):
def test_request_cancellation_vllm_aggregated(request, runtime_services):
"""
End-to-end test for request cancellation functionality in aggregated mode.
......@@ -209,7 +207,7 @@ def test_request_cancellation_vllm_aggregated(
@pytest.mark.timeout(150) # 3x average
def test_request_cancellation_vllm_decode_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for request cancellation during decode phase.
......@@ -279,7 +277,7 @@ def test_request_cancellation_vllm_decode_cancel(
@pytest.mark.timeout(150) # 3x average
def test_request_cancellation_vllm_prefill_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for request cancellation during prefill phase.
......
......@@ -115,7 +115,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(235) # 3x average
def test_request_migration_sglang_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration support using SGLang.
......@@ -159,7 +159,7 @@ def test_request_migration_sglang_worker_failure(
@pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented")
def test_request_migration_sglang_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support using SGLang.
......@@ -207,7 +207,7 @@ def test_request_migration_sglang_graceful_shutdown(
@pytest.mark.timeout(135) # 3x average
def test_no_request_migration_sglang_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration disabled using SGLang.
......@@ -267,7 +267,7 @@ def test_no_request_migration_sglang_worker_failure(
@pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented")
def test_no_request_migration_sglang_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using SGLang.
......
......@@ -111,7 +111,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(290) # 3x average
def test_request_migration_trtllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration support using TRT-LLM.
......@@ -155,7 +155,7 @@ def test_request_migration_trtllm_worker_failure(
@pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented")
def test_request_migration_trtllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support using TRT-LLM.
......@@ -203,7 +203,7 @@ def test_request_migration_trtllm_graceful_shutdown(
@pytest.mark.timeout(185) # 3x average
def test_no_request_migration_trtllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration disabled using TRT-LLM.
......@@ -263,7 +263,7 @@ def test_no_request_migration_trtllm_worker_failure(
@pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented")
def test_no_request_migration_trtllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using TRT-LLM.
......
......@@ -115,7 +115,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(290) # 3x average
def test_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration support.
......@@ -159,7 +159,7 @@ def test_request_migration_vllm_worker_failure(
@pytest.mark.timeout(280) # 3x average
def test_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support.
......@@ -207,7 +207,7 @@ def test_request_migration_vllm_graceful_shutdown(
@pytest.mark.timeout(150) # 3x average
def test_no_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration disabled.
......@@ -267,7 +267,7 @@ def test_no_request_migration_vllm_worker_failure(
@pytest.mark.timeout(140) # 3x average
def test_no_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment