"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "5ff88b330bcae937c1320cae8ae774e711c1eff2"
Unverified Commit 0fa9b998 authored by Jacky's avatar Jacky Committed by GitHub
Browse files

test: Include a timeout marker to all Cancellation / Migration E2E tests (#4764)


Signed-off-by: default avatarJacky <18255193+kthui@users.noreply.github.com>
parent 501ef021
...@@ -21,6 +21,13 @@ from tests.utils.payloads import check_health_generate, check_models_api ...@@ -21,6 +21,13 @@ from tests.utils.payloads import check_health_generate, check_models_api
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
pytestmark = [
pytest.mark.sglang,
pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.nightly,
]
class DynamoWorkerProcess(ManagedProcess): class DynamoWorkerProcess(ManagedProcess):
"""Process manager for Dynamo worker with SGLang backend""" """Process manager for Dynamo worker with SGLang backend"""
...@@ -146,11 +153,8 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -146,11 +153,8 @@ class DynamoWorkerProcess(ManagedProcess):
return False return False
@pytest.mark.e2e @pytest.mark.timeout(160) # 3x average
@pytest.mark.sglang
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
@pytest.mark.xfail(strict=False) @pytest.mark.xfail(strict=False)
def test_request_cancellation_sglang_aggregated( def test_request_cancellation_sglang_aggregated(
request, runtime_services, predownload_models request, runtime_services, predownload_models
...@@ -236,11 +240,8 @@ def test_request_cancellation_sglang_aggregated( ...@@ -236,11 +240,8 @@ def test_request_cancellation_sglang_aggregated(
logger.info(f"{description} detected successfully") logger.info(f"{description} detected successfully")
@pytest.mark.e2e @pytest.mark.timeout(185) # 3x average
@pytest.mark.sglang
@pytest.mark.gpu_2 @pytest.mark.gpu_2
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_sglang_decode_cancel( def test_request_cancellation_sglang_decode_cancel(
request, runtime_services, predownload_models request, runtime_services, predownload_models
): ):
......
...@@ -26,6 +26,7 @@ pytestmark = [ ...@@ -26,6 +26,7 @@ pytestmark = [
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.e2e, pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME), pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.nightly,
] ]
...@@ -134,7 +135,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -134,7 +135,7 @@ class DynamoWorkerProcess(ManagedProcess):
return False return False
@pytest.mark.nightly @pytest.mark.timeout(140) # 3x average
def test_request_cancellation_trtllm_aggregated( def test_request_cancellation_trtllm_aggregated(
request, runtime_services, predownload_models request, runtime_services, predownload_models
): ):
...@@ -208,7 +209,7 @@ def test_request_cancellation_trtllm_aggregated( ...@@ -208,7 +209,7 @@ def test_request_cancellation_trtllm_aggregated(
logger.info(f"{description} detected successfully") logger.info(f"{description} detected successfully")
@pytest.mark.nightly @pytest.mark.timeout(350) # 3x average
def test_request_cancellation_trtllm_decode_cancel( def test_request_cancellation_trtllm_decode_cancel(
request, runtime_services, predownload_models request, runtime_services, predownload_models
): ):
...@@ -281,7 +282,7 @@ def test_request_cancellation_trtllm_decode_cancel( ...@@ -281,7 +282,7 @@ def test_request_cancellation_trtllm_decode_cancel(
) )
@pytest.mark.nightly @pytest.mark.timeout(350) # 3x average
def test_request_cancellation_trtllm_prefill_cancel( def test_request_cancellation_trtllm_prefill_cancel(
request, runtime_services, predownload_models request, runtime_services, predownload_models
): ):
...@@ -364,6 +365,7 @@ def test_request_cancellation_trtllm_prefill_cancel( ...@@ -364,6 +365,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
) )
@pytest.mark.timeout(350) # 3x average
@pytest.mark.xfail( @pytest.mark.xfail(
reason="May fail due to unknown reason with TRT-LLM or backend implementation", reason="May fail due to unknown reason with TRT-LLM or backend implementation",
strict=False, strict=False,
......
...@@ -20,6 +20,14 @@ from tests.utils.payloads import check_health_generate, check_models_api ...@@ -20,6 +20,14 @@ from tests.utils.payloads import check_health_generate, check_models_api
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
pytestmark = [
pytest.mark.vllm,
pytest.mark.gpu_1,
pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.nightly,
]
class DynamoWorkerProcess(ManagedProcess): class DynamoWorkerProcess(ManagedProcess):
"""Process manager for Dynamo worker with vLLM backend""" """Process manager for Dynamo worker with vLLM backend"""
...@@ -120,11 +128,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -120,11 +128,7 @@ class DynamoWorkerProcess(ManagedProcess):
return False return False
@pytest.mark.vllm @pytest.mark.timeout(110) # 3x average
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_vllm_aggregated( def test_request_cancellation_vllm_aggregated(
request, runtime_services, predownload_models request, runtime_services, predownload_models
): ):
...@@ -198,11 +202,7 @@ def test_request_cancellation_vllm_aggregated( ...@@ -198,11 +202,7 @@ def test_request_cancellation_vllm_aggregated(
logger.info(f"{description} detected successfully") logger.info(f"{description} detected successfully")
@pytest.mark.vllm @pytest.mark.timeout(150) # 3x average
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_vllm_decode_cancel( def test_request_cancellation_vllm_decode_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -272,11 +272,7 @@ def test_request_cancellation_vllm_decode_cancel( ...@@ -272,11 +272,7 @@ def test_request_cancellation_vllm_decode_cancel(
) )
@pytest.mark.vllm @pytest.mark.timeout(150) # 3x average
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_vllm_prefill_cancel( def test_request_cancellation_vllm_prefill_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
......
...@@ -108,6 +108,11 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -108,6 +108,11 @@ class DynamoWorkerProcess(ManagedProcess):
return False return False
@pytest.mark.timeout(235) # 3x average
@pytest.mark.xfail(
reason="For some reason both replicas received the request where only one should",
strict=False,
)
def test_request_migration_sglang_worker_failure( def test_request_migration_sglang_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -199,6 +204,11 @@ def test_request_migration_sglang_graceful_shutdown( ...@@ -199,6 +204,11 @@ def test_request_migration_sglang_graceful_shutdown(
verify_migration_occurred(frontend) verify_migration_occurred(frontend)
@pytest.mark.timeout(135) # 3x average
@pytest.mark.xfail(
reason="For some reason both replicas received the request where only one should",
strict=False,
)
def test_no_request_migration_sglang_worker_failure( def test_no_request_migration_sglang_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
......
...@@ -104,6 +104,11 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -104,6 +104,11 @@ class DynamoWorkerProcess(ManagedProcess):
return False return False
@pytest.mark.timeout(290) # 3x average
@pytest.mark.xfail(
reason="For some reason both replicas received the request where only one should",
strict=False,
)
def test_request_migration_trtllm_worker_failure( def test_request_migration_trtllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -195,6 +200,11 @@ def test_request_migration_trtllm_graceful_shutdown( ...@@ -195,6 +200,11 @@ def test_request_migration_trtllm_graceful_shutdown(
verify_migration_occurred(frontend) verify_migration_occurred(frontend)
@pytest.mark.timeout(185) # 3x average
@pytest.mark.xfail(
reason="For some reason both replicas received the request where only one should",
strict=False,
)
def test_no_request_migration_trtllm_worker_failure( def test_no_request_migration_trtllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
......
...@@ -108,6 +108,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -108,6 +108,7 @@ class DynamoWorkerProcess(ManagedProcess):
return False return False
@pytest.mark.timeout(290) # 3x average
def test_request_migration_vllm_worker_failure( def test_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -151,6 +152,7 @@ def test_request_migration_vllm_worker_failure( ...@@ -151,6 +152,7 @@ def test_request_migration_vllm_worker_failure(
verify_migration_occurred(frontend) verify_migration_occurred(frontend)
@pytest.mark.timeout(280) # 3x average
def test_request_migration_vllm_graceful_shutdown( def test_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -198,6 +200,7 @@ def test_request_migration_vllm_graceful_shutdown( ...@@ -198,6 +200,7 @@ def test_request_migration_vllm_graceful_shutdown(
verify_migration_occurred(frontend) verify_migration_occurred(frontend)
@pytest.mark.timeout(150) # 3x average
def test_no_request_migration_vllm_worker_failure( def test_no_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -257,6 +260,7 @@ def test_no_request_migration_vllm_worker_failure( ...@@ -257,6 +260,7 @@ def test_no_request_migration_vllm_worker_failure(
), f"Unexpected migration message: {e}" ), f"Unexpected migration message: {e}"
@pytest.mark.timeout(140) # 3x average
def test_no_request_migration_vllm_graceful_shutdown( def test_no_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment