Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
560bb2fc
Unverified
Commit
560bb2fc
authored
Nov 07, 2025
by
Ryan McCormick
Committed by
GitHub
Nov 08, 2025
Browse files
ci: Skip broken etcd_ha tests until fixed to unblock unrelated PRs (#4198)
parent
93ada899
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
12 additions
and
0 deletions
+12
-0
tests/fault_tolerance/etcd_ha/test_sglang.py
tests/fault_tolerance/etcd_ha/test_sglang.py
+4
-0
tests/fault_tolerance/etcd_ha/test_trtllm.py
tests/fault_tolerance/etcd_ha/test_trtllm.py
+4
-0
tests/fault_tolerance/etcd_ha/test_vllm.py
tests/fault_tolerance/etcd_ha/test_vllm.py
+4
-0
No files found.
tests/fault_tolerance/etcd_ha/test_sglang.py
View file @
560bb2fc
...
@@ -149,6 +149,7 @@ class DynamoWorkerProcess(ManagedProcess):
...
@@ -149,6 +149,7 @@ class DynamoWorkerProcess(ManagedProcess):
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_ha_failover_sglang_aggregated
(
request
,
predownload_models
):
def
test_etcd_ha_failover_sglang_aggregated
(
request
,
predownload_models
):
"""
"""
Test ETCD High Availability with leader failover using SGLang.
Test ETCD High Availability with leader failover using SGLang.
...
@@ -209,6 +210,7 @@ def test_etcd_ha_failover_sglang_aggregated(request, predownload_models):
...
@@ -209,6 +210,7 @@ def test_etcd_ha_failover_sglang_aggregated(request, predownload_models):
@
pytest
.
mark
.
gpu_2
@
pytest
.
mark
.
gpu_2
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_ha_failover_sglang_disaggregated
(
def
test_etcd_ha_failover_sglang_disaggregated
(
request
,
predownload_models
,
set_ucx_tls_no_mm
request
,
predownload_models
,
set_ucx_tls_no_mm
):
):
...
@@ -277,6 +279,7 @@ def test_etcd_ha_failover_sglang_disaggregated(
...
@@ -277,6 +279,7 @@ def test_etcd_ha_failover_sglang_disaggregated(
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_non_ha_shutdown_sglang_aggregated
(
request
,
predownload_models
):
def
test_etcd_non_ha_shutdown_sglang_aggregated
(
request
,
predownload_models
):
"""
"""
Test that frontend and worker shut down when single ETCD node is terminated using SGLang.
Test that frontend and worker shut down when single ETCD node is terminated using SGLang.
...
@@ -333,6 +336,7 @@ def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models):
...
@@ -333,6 +336,7 @@ def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models):
@
pytest
.
mark
.
gpu_2
@
pytest
.
mark
.
gpu_2
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_non_ha_shutdown_sglang_disaggregated
(
def
test_etcd_non_ha_shutdown_sglang_disaggregated
(
request
,
predownload_models
,
set_ucx_tls_no_mm
request
,
predownload_models
,
set_ucx_tls_no_mm
):
):
...
...
tests/fault_tolerance/etcd_ha/test_trtllm.py
View file @
560bb2fc
...
@@ -135,6 +135,7 @@ class DynamoWorkerProcess(ManagedProcess):
...
@@ -135,6 +135,7 @@ class DynamoWorkerProcess(ManagedProcess):
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_ha_failover_trtllm_aggregated
(
request
,
predownload_models
):
def
test_etcd_ha_failover_trtllm_aggregated
(
request
,
predownload_models
):
"""
"""
Test ETCD High Availability with leader failover for TRT-LLM in aggregated mode.
Test ETCD High Availability with leader failover for TRT-LLM in aggregated mode.
...
@@ -195,6 +196,7 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
...
@@ -195,6 +196,7 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_ha_failover_trtllm_disaggregated
(
def
test_etcd_ha_failover_trtllm_disaggregated
(
request
,
predownload_models
,
set_ucx_tls_no_mm
request
,
predownload_models
,
set_ucx_tls_no_mm
):
):
...
@@ -262,6 +264,7 @@ def test_etcd_ha_failover_trtllm_disaggregated(
...
@@ -262,6 +264,7 @@ def test_etcd_ha_failover_trtllm_disaggregated(
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_non_ha_shutdown_trtllm_aggregated
(
request
,
predownload_models
):
def
test_etcd_non_ha_shutdown_trtllm_aggregated
(
request
,
predownload_models
):
"""
"""
Test that frontend and worker shut down when single ETCD node is terminated for TRT-LLM in aggregated mode.
Test that frontend and worker shut down when single ETCD node is terminated for TRT-LLM in aggregated mode.
...
@@ -321,6 +324,7 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
...
@@ -321,6 +324,7 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_non_ha_shutdown_trtllm_disaggregated
(
def
test_etcd_non_ha_shutdown_trtllm_disaggregated
(
request
,
predownload_models
,
set_ucx_tls_no_mm
request
,
predownload_models
,
set_ucx_tls_no_mm
):
):
...
...
tests/fault_tolerance/etcd_ha/test_vllm.py
View file @
560bb2fc
...
@@ -117,6 +117,7 @@ class DynamoWorkerProcess(ManagedProcess):
...
@@ -117,6 +117,7 @@ class DynamoWorkerProcess(ManagedProcess):
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_ha_failover_vllm_aggregated
(
request
,
predownload_models
):
def
test_etcd_ha_failover_vllm_aggregated
(
request
,
predownload_models
):
"""
"""
Test ETCD High Availability with leader failover.
Test ETCD High Availability with leader failover.
...
@@ -175,6 +176,7 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
...
@@ -175,6 +176,7 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_ha_failover_vllm_disaggregated
(
def
test_etcd_ha_failover_vllm_disaggregated
(
request
,
predownload_models
,
set_ucx_tls_no_mm
request
,
predownload_models
,
set_ucx_tls_no_mm
):
):
...
@@ -239,6 +241,7 @@ def test_etcd_ha_failover_vllm_disaggregated(
...
@@ -239,6 +241,7 @@ def test_etcd_ha_failover_vllm_disaggregated(
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_non_ha_shutdown_vllm_aggregated
(
request
,
predownload_models
):
def
test_etcd_non_ha_shutdown_vllm_aggregated
(
request
,
predownload_models
):
"""
"""
Test that frontend and worker shut down when single ETCD node is terminated.
Test that frontend and worker shut down when single ETCD node is terminated.
...
@@ -293,6 +296,7 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
...
@@ -293,6 +296,7 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
e2e
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
model
(
FAULT_TOLERANCE_MODEL_NAME
)
@
pytest
.
mark
.
skip
(
reason
=
"Broken, temporarily disabled"
)
def
test_etcd_non_ha_shutdown_vllm_disaggregated
(
def
test_etcd_non_ha_shutdown_vllm_disaggregated
(
request
,
predownload_models
,
set_ucx_tls_no_mm
request
,
predownload_models
,
set_ucx_tls_no_mm
):
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment