Unverified Commit 560bb2fc authored by Ryan McCormick's avatar Ryan McCormick Committed by GitHub
Browse files

ci: Skip broken etcd_ha tests until fixed to unblock unrelated PRs (#4198)

parent 93ada899
...@@ -149,6 +149,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -149,6 +149,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_sglang_aggregated(request, predownload_models): def test_etcd_ha_failover_sglang_aggregated(request, predownload_models):
""" """
Test ETCD High Availability with leader failover using SGLang. Test ETCD High Availability with leader failover using SGLang.
...@@ -209,6 +210,7 @@ def test_etcd_ha_failover_sglang_aggregated(request, predownload_models): ...@@ -209,6 +210,7 @@ def test_etcd_ha_failover_sglang_aggregated(request, predownload_models):
@pytest.mark.gpu_2 @pytest.mark.gpu_2
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_sglang_disaggregated( def test_etcd_ha_failover_sglang_disaggregated(
request, predownload_models, set_ucx_tls_no_mm request, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -277,6 +279,7 @@ def test_etcd_ha_failover_sglang_disaggregated( ...@@ -277,6 +279,7 @@ def test_etcd_ha_failover_sglang_disaggregated(
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models): def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models):
""" """
Test that frontend and worker shut down when single ETCD node is terminated using SGLang. Test that frontend and worker shut down when single ETCD node is terminated using SGLang.
...@@ -333,6 +336,7 @@ def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models): ...@@ -333,6 +336,7 @@ def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models):
@pytest.mark.gpu_2 @pytest.mark.gpu_2
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_sglang_disaggregated( def test_etcd_non_ha_shutdown_sglang_disaggregated(
request, predownload_models, set_ucx_tls_no_mm request, predownload_models, set_ucx_tls_no_mm
): ):
......
...@@ -135,6 +135,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -135,6 +135,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models): def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
""" """
Test ETCD High Availability with leader failover for TRT-LLM in aggregated mode. Test ETCD High Availability with leader failover for TRT-LLM in aggregated mode.
...@@ -195,6 +196,7 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models): ...@@ -195,6 +196,7 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_trtllm_disaggregated( def test_etcd_ha_failover_trtllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm request, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -262,6 +264,7 @@ def test_etcd_ha_failover_trtllm_disaggregated( ...@@ -262,6 +264,7 @@ def test_etcd_ha_failover_trtllm_disaggregated(
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models): def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
""" """
Test that frontend and worker shut down when single ETCD node is terminated for TRT-LLM in aggregated mode. Test that frontend and worker shut down when single ETCD node is terminated for TRT-LLM in aggregated mode.
...@@ -321,6 +324,7 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models): ...@@ -321,6 +324,7 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_trtllm_disaggregated( def test_etcd_non_ha_shutdown_trtllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm request, predownload_models, set_ucx_tls_no_mm
): ):
......
...@@ -117,6 +117,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -117,6 +117,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_vllm_aggregated(request, predownload_models): def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
""" """
Test ETCD High Availability with leader failover. Test ETCD High Availability with leader failover.
...@@ -175,6 +176,7 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models): ...@@ -175,6 +176,7 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_vllm_disaggregated( def test_etcd_ha_failover_vllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm request, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -239,6 +241,7 @@ def test_etcd_ha_failover_vllm_disaggregated( ...@@ -239,6 +241,7 @@ def test_etcd_ha_failover_vllm_disaggregated(
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models): def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
""" """
Test that frontend and worker shut down when single ETCD node is terminated. Test that frontend and worker shut down when single ETCD node is terminated.
...@@ -293,6 +296,7 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models): ...@@ -293,6 +296,7 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_vllm_disaggregated( def test_etcd_non_ha_shutdown_vllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm request, predownload_models, set_ucx_tls_no_mm
): ):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment