Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
15422ed3
Unverified
Commit
15422ed3
authored
Jan 14, 2026
by
Ryan Rock
Committed by
GitHub
Jan 15, 2026
Browse files
[CI/Build][Hardware][AMD] Fix v1/shutdown (#31997)
Signed-off-by:
Ryan Rock
<
ryan.rock@amd.com
>
parent
8471b27d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
64 additions
and
3 deletions
+64
-3
tests/v1/shutdown/conftest.py
tests/v1/shutdown/conftest.py
+26
-0
tests/v1/shutdown/test_forward_error.py
tests/v1/shutdown/test_forward_error.py
+18
-2
tests/v1/shutdown/test_startup_error.py
tests/v1/shutdown/test_startup_error.py
+20
-1
No files found.
tests/v1/shutdown/conftest.py
0 → 100644
View file @
15422ed3
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
from
collections.abc
import
Iterable
from
pathlib
import
Path
import
pytest
from
vllm.platforms
import
current_platform
@
pytest
.
fixture
def
rocm_sitecustomize_factory
(
monkeypatch
,
tmp_path
:
Path
):
"""Return a function that installs a given sitecustomize payload."""
if
not
current_platform
.
is_rocm
():
return
lambda
_
:
None
def
install
(
lines
:
Iterable
[
str
])
->
None
:
sc
=
tmp_path
/
"sitecustomize.py"
sc
.
write_text
(
"
\n
"
.
join
(
lines
)
+
"
\n
"
)
monkeypatch
.
setenv
(
"PYTHONPATH"
,
os
.
pathsep
.
join
(
filter
(
None
,
[
str
(
tmp_path
),
os
.
getenv
(
"PYTHONPATH"
)])),
)
return
install
tests/v1/shutdown/test_forward_error.py
View file @
15422ed3
...
...
@@ -3,6 +3,7 @@
"""Test that we handle an Error in model forward and shutdown."""
import
asyncio
import
inspect
import
pytest
...
...
@@ -38,11 +39,22 @@ def evil_forward(self, *args, **kwargs):
return
self
.
model
(
*
args
,
**
kwargs
)
@
pytest
.
fixture
def
rocm_evil_forward
(
rocm_sitecustomize_factory
):
lines
=
[
"from vllm.distributed import get_tensor_model_parallel_rank"
,
"from vllm.model_executor.models.llama import LlamaForCausalLM"
,
inspect
.
getsource
(
evil_forward
),
f
"LlamaForCausalLM.forward =
{
evil_forward
.
__name__
}
"
,
]
rocm_sitecustomize_factory
(
lines
)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"tensor_parallel_size"
,
[
2
,
1
])
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
async
def
test_async_llm_model_error
(
monkeypatch
,
tensor_parallel_size
:
int
,
model
:
str
monkeypatch
,
rocm_evil_forward
,
tensor_parallel_size
:
int
,
model
:
str
)
->
None
:
"""Test that AsyncLLM propagates a forward pass error and frees memory.
...
...
@@ -104,7 +116,11 @@ async def test_async_llm_model_error(
@
pytest
.
mark
.
parametrize
(
"tensor_parallel_size"
,
[
2
,
1
])
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
def
test_llm_model_error
(
monkeypatch
,
tensor_parallel_size
:
int
,
enable_multiprocessing
:
bool
,
model
:
str
monkeypatch
,
rocm_evil_forward
,
tensor_parallel_size
:
int
,
enable_multiprocessing
:
bool
,
model
:
str
,
)
->
None
:
"""Test that LLM propagates a forward pass error and frees memory.
TODO(andy) - LLM without multiprocessing; LLM with multiprocessing
...
...
tests/v1/shutdown/test_startup_error.py
View file @
15422ed3
...
...
@@ -2,6 +2,8 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test that we handle a startup Error and shutdown."""
import
inspect
import
pytest
from
tests.utils
import
wait_for_gpu_memory_to_clear
...
...
@@ -28,12 +30,28 @@ def evil_method(self, *args, **kwargs):
return
self
.
model
(
*
args
,
**
kwargs
,
intermediate_tensors
=
None
)
@
pytest
.
fixture
def
rocm_evil_method
(
rocm_sitecustomize_factory
,
request
):
failing_method
=
request
.
getfixturevalue
(
"failing_method"
)
lines
=
[
"from vllm.distributed import get_tensor_model_parallel_rank"
,
"from vllm.model_executor.models.llama import LlamaForCausalLM"
,
inspect
.
getsource
(
evil_method
),
f
"LlamaForCausalLM.
{
failing_method
}
=
{
evil_method
.
__name__
}
"
,
]
rocm_sitecustomize_factory
(
lines
)
@
pytest
.
mark
.
timeout
(
SHUTDOWN_TEST_TIMEOUT_SEC
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"tensor_parallel_size"
,
[
2
,
1
])
@
pytest
.
mark
.
parametrize
(
"failing_method"
,
[
"forward"
,
"load_weights"
])
def
test_async_llm_startup_error
(
monkeypatch
,
model
:
str
,
tensor_parallel_size
:
int
,
failing_method
:
str
monkeypatch
,
rocm_evil_method
,
model
:
str
,
tensor_parallel_size
:
int
,
failing_method
:
str
,
)
->
None
:
"""Test that AsyncLLM propagates an __init__ error & frees memory.
Test profiling (forward()) and load weights failures.
...
...
@@ -67,6 +85,7 @@ def test_async_llm_startup_error(
@
pytest
.
mark
.
parametrize
(
"failing_method"
,
[
"forward"
,
"load_weights"
])
def
test_llm_startup_error
(
monkeypatch
,
rocm_evil_method
,
model
:
str
,
tensor_parallel_size
:
int
,
enable_multiprocessing
:
bool
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment