Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
4ca1679c
Unverified
Commit
4ca1679c
authored
Dec 08, 2025
by
Jacky
Committed by
GitHub
Dec 09, 2025
Browse files
test: Pre-download models before tests are ran (#4811)
Signed-off-by:
Jacky
<
18255193+kthui@users.noreply.github.com
>
parent
e6de33f8
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
42 additions
and
35 deletions
+42
-35
tests/conftest.py
tests/conftest.py
+21
-0
tests/fault_tolerance/cancellation/test_sglang.py
tests/fault_tolerance/cancellation/test_sglang.py
+2
-6
tests/fault_tolerance/cancellation/test_trtllm.py
tests/fault_tolerance/cancellation/test_trtllm.py
+4
-12
tests/fault_tolerance/cancellation/test_vllm.py
tests/fault_tolerance/cancellation/test_vllm.py
+3
-5
tests/fault_tolerance/migration/test_sglang.py
tests/fault_tolerance/migration/test_sglang.py
+4
-4
tests/fault_tolerance/migration/test_trtllm.py
tests/fault_tolerance/migration/test_trtllm.py
+4
-4
tests/fault_tolerance/migration/test_vllm.py
tests/fault_tolerance/migration/test_vllm.py
+4
-4
No files found.
tests/conftest.py
View file @
4ca1679c
...
...
@@ -17,6 +17,7 @@ import logging
import
os
import
shutil
import
tempfile
import
time
from
pathlib
import
Path
from
typing
import
Optional
...
...
@@ -226,6 +227,26 @@ def pytest_collection_modifyitems(config, items):
config
.
models_to_download
=
models_to_download
def
pytest_runtestloop
(
session
):
"""Download models after collection but before any tests run.
This hook runs after pytest_collection_modifyitems (so models are collected)
but before any test execution, ensuring model downloads don't count against test timeouts.
"""
models
=
getattr
(
session
.
config
,
"models_to_download"
,
None
)
if
models
:
logging
.
info
(
f
"Downloading
{
len
(
models
)
}
models before test execution
\n
Models:
{
models
}
"
)
start_time
=
time
.
time
()
download_models
(
model_list
=
list
(
models
))
download_duration
=
time
.
time
()
-
start_time
logging
.
info
(
f
"Model download completed in
{
download_duration
:.
1
f
}
s"
)
class
EtcdServer
(
ManagedProcess
):
def
__init__
(
self
,
request
,
port
=
2379
,
timeout
=
300
):
port_string
=
str
(
port
)
...
...
tests/fault_tolerance/cancellation/test_sglang.py
View file @
4ca1679c
...
...
@@ -161,9 +161,7 @@ class DynamoWorkerProcess(ManagedProcess):
@
pytest
.
mark
.
timeout
(
160
)
# 3x average
@
pytest
.
mark
.
gpu_1
@
pytest
.
mark
.
xfail
(
strict
=
False
)
def
test_request_cancellation_sglang_aggregated
(
request
,
runtime_services
,
predownload_models
):
def
test_request_cancellation_sglang_aggregated
(
request
,
runtime_services
):
"""
End-to-end test for request cancellation functionality in aggregated mode.
...
...
@@ -247,9 +245,7 @@ def test_request_cancellation_sglang_aggregated(
@
pytest
.
mark
.
timeout
(
185
)
# 3x average
@
pytest
.
mark
.
gpu_2
def
test_request_cancellation_sglang_decode_cancel
(
request
,
runtime_services
,
predownload_models
):
def
test_request_cancellation_sglang_decode_cancel
(
request
,
runtime_services
):
"""
End-to-end test for request cancellation during decode phase.
...
...
tests/fault_tolerance/cancellation/test_trtllm.py
View file @
4ca1679c
...
...
@@ -141,9 +141,7 @@ class DynamoWorkerProcess(ManagedProcess):
@
pytest
.
mark
.
timeout
(
140
)
# 3x average
def
test_request_cancellation_trtllm_aggregated
(
request
,
runtime_services
,
predownload_models
):
def
test_request_cancellation_trtllm_aggregated
(
request
,
runtime_services
):
"""
End-to-end test for request cancellation functionality in aggregated mode.
...
...
@@ -215,9 +213,7 @@ def test_request_cancellation_trtllm_aggregated(
@
pytest
.
mark
.
timeout
(
350
)
# 3x average
def
test_request_cancellation_trtllm_decode_cancel
(
request
,
runtime_services
,
predownload_models
):
def
test_request_cancellation_trtllm_decode_cancel
(
request
,
runtime_services
):
"""
End-to-end test for request cancellation during decode phase with unified frontend.
...
...
@@ -288,9 +284,7 @@ def test_request_cancellation_trtllm_decode_cancel(
@
pytest
.
mark
.
timeout
(
350
)
# 3x average
def
test_request_cancellation_trtllm_prefill_cancel
(
request
,
runtime_services
,
predownload_models
):
def
test_request_cancellation_trtllm_prefill_cancel
(
request
,
runtime_services
):
"""
End-to-end test for request cancellation during prefill phase with unified frontend.
...
...
@@ -375,9 +369,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
reason
=
"May fail due to unknown reason with TRT-LLM or backend implementation"
,
strict
=
False
,
)
def
test_request_cancellation_trtllm_kv_transfer_cancel
(
request
,
runtime_services
,
predownload_models
):
def
test_request_cancellation_trtllm_kv_transfer_cancel
(
request
,
runtime_services
):
"""
End-to-end test for request cancellation during prefill to decode KV transfer phase.
...
...
tests/fault_tolerance/cancellation/test_vllm.py
View file @
4ca1679c
...
...
@@ -134,9 +134,7 @@ class DynamoWorkerProcess(ManagedProcess):
@
pytest
.
mark
.
timeout
(
110
)
# 3x average
def
test_request_cancellation_vllm_aggregated
(
request
,
runtime_services
,
predownload_models
):
def
test_request_cancellation_vllm_aggregated
(
request
,
runtime_services
):
"""
End-to-end test for request cancellation functionality in aggregated mode.
...
...
@@ -209,7 +207,7 @@ def test_request_cancellation_vllm_aggregated(
@
pytest
.
mark
.
timeout
(
150
)
# 3x average
def
test_request_cancellation_vllm_decode_cancel
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for request cancellation during decode phase.
...
...
@@ -279,7 +277,7 @@ def test_request_cancellation_vllm_decode_cancel(
@
pytest
.
mark
.
timeout
(
150
)
# 3x average
def
test_request_cancellation_vllm_prefill_cancel
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for request cancellation during prefill phase.
...
...
tests/fault_tolerance/migration/test_sglang.py
View file @
4ca1679c
...
...
@@ -115,7 +115,7 @@ class DynamoWorkerProcess(ManagedProcess):
@
pytest
.
mark
.
timeout
(
235
)
# 3x average
def
test_request_migration_sglang_worker_failure
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration support using SGLang.
...
...
@@ -159,7 +159,7 @@ def test_request_migration_sglang_worker_failure(
@
pytest
.
mark
.
skip
(
reason
=
"SGLang graceful shutdown not yet implemented"
)
def
test_request_migration_sglang_graceful_shutdown
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support using SGLang.
...
...
@@ -207,7 +207,7 @@ def test_request_migration_sglang_graceful_shutdown(
@
pytest
.
mark
.
timeout
(
135
)
# 3x average
def
test_no_request_migration_sglang_worker_failure
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration disabled using SGLang.
...
...
@@ -267,7 +267,7 @@ def test_no_request_migration_sglang_worker_failure(
@
pytest
.
mark
.
skip
(
reason
=
"SGLang graceful shutdown not yet implemented"
)
def
test_no_request_migration_sglang_graceful_shutdown
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using SGLang.
...
...
tests/fault_tolerance/migration/test_trtllm.py
View file @
4ca1679c
...
...
@@ -111,7 +111,7 @@ class DynamoWorkerProcess(ManagedProcess):
@
pytest
.
mark
.
timeout
(
290
)
# 3x average
def
test_request_migration_trtllm_worker_failure
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration support using TRT-LLM.
...
...
@@ -155,7 +155,7 @@ def test_request_migration_trtllm_worker_failure(
@
pytest
.
mark
.
skip
(
reason
=
"TRT-LLM graceful shutdown not yet implemented"
)
def
test_request_migration_trtllm_graceful_shutdown
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support using TRT-LLM.
...
...
@@ -203,7 +203,7 @@ def test_request_migration_trtllm_graceful_shutdown(
@
pytest
.
mark
.
timeout
(
185
)
# 3x average
def
test_no_request_migration_trtllm_worker_failure
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration disabled using TRT-LLM.
...
...
@@ -263,7 +263,7 @@ def test_no_request_migration_trtllm_worker_failure(
@
pytest
.
mark
.
skip
(
reason
=
"TRT-LLM graceful shutdown not yet implemented"
)
def
test_no_request_migration_trtllm_graceful_shutdown
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using TRT-LLM.
...
...
tests/fault_tolerance/migration/test_vllm.py
View file @
4ca1679c
...
...
@@ -115,7 +115,7 @@ class DynamoWorkerProcess(ManagedProcess):
@
pytest
.
mark
.
timeout
(
290
)
# 3x average
def
test_request_migration_vllm_worker_failure
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration support.
...
...
@@ -159,7 +159,7 @@ def test_request_migration_vllm_worker_failure(
@
pytest
.
mark
.
timeout
(
280
)
# 3x average
def
test_request_migration_vllm_graceful_shutdown
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support.
...
...
@@ -207,7 +207,7 @@ def test_request_migration_vllm_graceful_shutdown(
@
pytest
.
mark
.
timeout
(
150
)
# 3x average
def
test_no_request_migration_vllm_worker_failure
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration disabled.
...
...
@@ -267,7 +267,7 @@ def test_no_request_migration_vllm_worker_failure(
@
pytest
.
mark
.
timeout
(
140
)
# 3x average
def
test_no_request_migration_vllm_graceful_shutdown
(
request
,
runtime_services
,
predownload_models
,
set_ucx_tls_no_mm
request
,
runtime_services
,
set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment