"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "158435cd9c052661d98d507c4272411c4d0f2c2a"
Unverified Commit 4ca1679c authored by Jacky's avatar Jacky Committed by GitHub
Browse files

test: Pre-download models before tests are ran (#4811)


Signed-off-by: default avatarJacky <18255193+kthui@users.noreply.github.com>
parent e6de33f8
...@@ -17,6 +17,7 @@ import logging ...@@ -17,6 +17,7 @@ import logging
import os import os
import shutil import shutil
import tempfile import tempfile
import time
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
...@@ -226,6 +227,26 @@ def pytest_collection_modifyitems(config, items): ...@@ -226,6 +227,26 @@ def pytest_collection_modifyitems(config, items):
config.models_to_download = models_to_download config.models_to_download = models_to_download
def pytest_runtestloop(session):
"""Download models after collection but before any tests run.
This hook runs after pytest_collection_modifyitems (so models are collected)
but before any test execution, ensuring model downloads don't count against test timeouts.
"""
models = getattr(session.config, "models_to_download", None)
if models:
logging.info(
f"Downloading {len(models)} models before test execution\nModels: {models}"
)
start_time = time.time()
download_models(model_list=list(models))
download_duration = time.time() - start_time
logging.info(f"Model download completed in {download_duration:.1f}s")
class EtcdServer(ManagedProcess): class EtcdServer(ManagedProcess):
def __init__(self, request, port=2379, timeout=300): def __init__(self, request, port=2379, timeout=300):
port_string = str(port) port_string = str(port)
......
...@@ -161,9 +161,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -161,9 +161,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(160) # 3x average @pytest.mark.timeout(160) # 3x average
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.xfail(strict=False) @pytest.mark.xfail(strict=False)
def test_request_cancellation_sglang_aggregated( def test_request_cancellation_sglang_aggregated(request, runtime_services):
request, runtime_services, predownload_models
):
""" """
End-to-end test for request cancellation functionality in aggregated mode. End-to-end test for request cancellation functionality in aggregated mode.
...@@ -247,9 +245,7 @@ def test_request_cancellation_sglang_aggregated( ...@@ -247,9 +245,7 @@ def test_request_cancellation_sglang_aggregated(
@pytest.mark.timeout(185) # 3x average @pytest.mark.timeout(185) # 3x average
@pytest.mark.gpu_2 @pytest.mark.gpu_2
def test_request_cancellation_sglang_decode_cancel( def test_request_cancellation_sglang_decode_cancel(request, runtime_services):
request, runtime_services, predownload_models
):
""" """
End-to-end test for request cancellation during decode phase. End-to-end test for request cancellation during decode phase.
......
...@@ -141,9 +141,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -141,9 +141,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(140) # 3x average @pytest.mark.timeout(140) # 3x average
def test_request_cancellation_trtllm_aggregated( def test_request_cancellation_trtllm_aggregated(request, runtime_services):
request, runtime_services, predownload_models
):
""" """
End-to-end test for request cancellation functionality in aggregated mode. End-to-end test for request cancellation functionality in aggregated mode.
...@@ -215,9 +213,7 @@ def test_request_cancellation_trtllm_aggregated( ...@@ -215,9 +213,7 @@ def test_request_cancellation_trtllm_aggregated(
@pytest.mark.timeout(350) # 3x average @pytest.mark.timeout(350) # 3x average
def test_request_cancellation_trtllm_decode_cancel( def test_request_cancellation_trtllm_decode_cancel(request, runtime_services):
request, runtime_services, predownload_models
):
""" """
End-to-end test for request cancellation during decode phase with unified frontend. End-to-end test for request cancellation during decode phase with unified frontend.
...@@ -288,9 +284,7 @@ def test_request_cancellation_trtllm_decode_cancel( ...@@ -288,9 +284,7 @@ def test_request_cancellation_trtllm_decode_cancel(
@pytest.mark.timeout(350) # 3x average @pytest.mark.timeout(350) # 3x average
def test_request_cancellation_trtllm_prefill_cancel( def test_request_cancellation_trtllm_prefill_cancel(request, runtime_services):
request, runtime_services, predownload_models
):
""" """
End-to-end test for request cancellation during prefill phase with unified frontend. End-to-end test for request cancellation during prefill phase with unified frontend.
...@@ -375,9 +369,7 @@ def test_request_cancellation_trtllm_prefill_cancel( ...@@ -375,9 +369,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
reason="May fail due to unknown reason with TRT-LLM or backend implementation", reason="May fail due to unknown reason with TRT-LLM or backend implementation",
strict=False, strict=False,
) )
def test_request_cancellation_trtllm_kv_transfer_cancel( def test_request_cancellation_trtllm_kv_transfer_cancel(request, runtime_services):
request, runtime_services, predownload_models
):
""" """
End-to-end test for request cancellation during prefill to decode KV transfer phase. End-to-end test for request cancellation during prefill to decode KV transfer phase.
......
...@@ -134,9 +134,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -134,9 +134,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(110) # 3x average @pytest.mark.timeout(110) # 3x average
def test_request_cancellation_vllm_aggregated( def test_request_cancellation_vllm_aggregated(request, runtime_services):
request, runtime_services, predownload_models
):
""" """
End-to-end test for request cancellation functionality in aggregated mode. End-to-end test for request cancellation functionality in aggregated mode.
...@@ -209,7 +207,7 @@ def test_request_cancellation_vllm_aggregated( ...@@ -209,7 +207,7 @@ def test_request_cancellation_vllm_aggregated(
@pytest.mark.timeout(150) # 3x average @pytest.mark.timeout(150) # 3x average
def test_request_cancellation_vllm_decode_cancel( def test_request_cancellation_vllm_decode_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for request cancellation during decode phase. End-to-end test for request cancellation during decode phase.
...@@ -279,7 +277,7 @@ def test_request_cancellation_vllm_decode_cancel( ...@@ -279,7 +277,7 @@ def test_request_cancellation_vllm_decode_cancel(
@pytest.mark.timeout(150) # 3x average @pytest.mark.timeout(150) # 3x average
def test_request_cancellation_vllm_prefill_cancel( def test_request_cancellation_vllm_prefill_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for request cancellation during prefill phase. End-to-end test for request cancellation during prefill phase.
......
...@@ -115,7 +115,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -115,7 +115,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(235) # 3x average @pytest.mark.timeout(235) # 3x average
def test_request_migration_sglang_worker_failure( def test_request_migration_sglang_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with migration support using SGLang. End-to-end test for worker fault tolerance with migration support using SGLang.
...@@ -159,7 +159,7 @@ def test_request_migration_sglang_worker_failure( ...@@ -159,7 +159,7 @@ def test_request_migration_sglang_worker_failure(
@pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented") @pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented")
def test_request_migration_sglang_graceful_shutdown( def test_request_migration_sglang_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration support using SGLang. End-to-end test for worker fault tolerance with graceful shutdown and migration support using SGLang.
...@@ -207,7 +207,7 @@ def test_request_migration_sglang_graceful_shutdown( ...@@ -207,7 +207,7 @@ def test_request_migration_sglang_graceful_shutdown(
@pytest.mark.timeout(135) # 3x average @pytest.mark.timeout(135) # 3x average
def test_no_request_migration_sglang_worker_failure( def test_no_request_migration_sglang_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with migration disabled using SGLang. End-to-end test for worker fault tolerance with migration disabled using SGLang.
...@@ -267,7 +267,7 @@ def test_no_request_migration_sglang_worker_failure( ...@@ -267,7 +267,7 @@ def test_no_request_migration_sglang_worker_failure(
@pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented") @pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented")
def test_no_request_migration_sglang_graceful_shutdown( def test_no_request_migration_sglang_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using SGLang. End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using SGLang.
......
...@@ -111,7 +111,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -111,7 +111,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(290) # 3x average @pytest.mark.timeout(290) # 3x average
def test_request_migration_trtllm_worker_failure( def test_request_migration_trtllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with migration support using TRT-LLM. End-to-end test for worker fault tolerance with migration support using TRT-LLM.
...@@ -155,7 +155,7 @@ def test_request_migration_trtllm_worker_failure( ...@@ -155,7 +155,7 @@ def test_request_migration_trtllm_worker_failure(
@pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented") @pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented")
def test_request_migration_trtllm_graceful_shutdown( def test_request_migration_trtllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration support using TRT-LLM. End-to-end test for worker fault tolerance with graceful shutdown and migration support using TRT-LLM.
...@@ -203,7 +203,7 @@ def test_request_migration_trtllm_graceful_shutdown( ...@@ -203,7 +203,7 @@ def test_request_migration_trtllm_graceful_shutdown(
@pytest.mark.timeout(185) # 3x average @pytest.mark.timeout(185) # 3x average
def test_no_request_migration_trtllm_worker_failure( def test_no_request_migration_trtllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with migration disabled using TRT-LLM. End-to-end test for worker fault tolerance with migration disabled using TRT-LLM.
...@@ -263,7 +263,7 @@ def test_no_request_migration_trtllm_worker_failure( ...@@ -263,7 +263,7 @@ def test_no_request_migration_trtllm_worker_failure(
@pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented") @pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented")
def test_no_request_migration_trtllm_graceful_shutdown( def test_no_request_migration_trtllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using TRT-LLM. End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using TRT-LLM.
......
...@@ -115,7 +115,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -115,7 +115,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(290) # 3x average @pytest.mark.timeout(290) # 3x average
def test_request_migration_vllm_worker_failure( def test_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with migration support. End-to-end test for worker fault tolerance with migration support.
...@@ -159,7 +159,7 @@ def test_request_migration_vllm_worker_failure( ...@@ -159,7 +159,7 @@ def test_request_migration_vllm_worker_failure(
@pytest.mark.timeout(280) # 3x average @pytest.mark.timeout(280) # 3x average
def test_request_migration_vllm_graceful_shutdown( def test_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration support. End-to-end test for worker fault tolerance with graceful shutdown and migration support.
...@@ -207,7 +207,7 @@ def test_request_migration_vllm_graceful_shutdown( ...@@ -207,7 +207,7 @@ def test_request_migration_vllm_graceful_shutdown(
@pytest.mark.timeout(150) # 3x average @pytest.mark.timeout(150) # 3x average
def test_no_request_migration_vllm_worker_failure( def test_no_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with migration disabled. End-to-end test for worker fault tolerance with migration disabled.
...@@ -267,7 +267,7 @@ def test_no_request_migration_vllm_worker_failure( ...@@ -267,7 +267,7 @@ def test_no_request_migration_vllm_worker_failure(
@pytest.mark.timeout(140) # 3x average @pytest.mark.timeout(140) # 3x average
def test_no_request_migration_vllm_graceful_shutdown( def test_no_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, set_ucx_tls_no_mm
): ):
""" """
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled. End-to-end test for worker fault tolerance with graceful shutdown and migration disabled.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment