Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
afb1e5b3
Unverified
Commit
afb1e5b3
authored
Dec 02, 2025
by
Divakar Verma
Committed by
GitHub
Dec 02, 2025
Browse files
[CI][ROCm][tests/v1/e2e] Fix multiprocessing launch for the test (#29123)
Signed-off-by:
Divakar Verma
<
divakar.verma@amd.com
>
parent
1c593e11
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
3 deletions
+19
-3
tests/v1/e2e/test_kv_sharing_fast_prefill.py
tests/v1/e2e/test_kv_sharing_fast_prefill.py
+19
-3
No files found.
tests/v1/e2e/test_kv_sharing_fast_prefill.py
View file @
afb1e5b3
...
...
@@ -7,6 +7,7 @@ import pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm.config
import
CompilationConfig
,
CompilationMode
from
vllm.platforms
import
current_platform
from
...utils
import
check_answers
,
fork_new_process_for_each_test
,
prep_prompts
...
...
@@ -43,15 +44,26 @@ def test_prompts():
return
prompts
@
fork_new_process_for_each_test
use_fork_for_test
=
(
fork_new_process_for_each_test
if
not
current_platform
.
is_rocm
()
else
lambda
x
:
x
)
@
use_fork_for_test
@
pytest
.
mark
.
parametrize
(
"kv_sharing_fast_prefill"
,
[
False
,
True
])
@
pytest
.
mark
.
parametrize
(
"enforce_eager"
,
[
True
,
False
])
def
test_kv_sharing_fast_prefill
(
monkeypatch
:
pytest
.
MonkeyPatch
,
kv_sharing_fast_prefill
:
bool
,
enforce_eager
:
bool
,
test_prompts
:
list
[
str
],
):
if
not
enforce_eager
and
current_platform
.
is_rocm
():
# Relevant context: https://github.com/vllm-project/vllm/pull/29244
pytest
.
skip
(
"ROCm: torch.compile produces incorrect output for gemma-3n's GELU "
"with tanh approximation. Use enforce_eager=True instead."
)
sampling_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
100
)
compilation_config
=
CompilationConfig
(
# This allows vLLM compilation backend to handle allocating and
...
...
@@ -65,6 +77,10 @@ def test_kv_sharing_fast_prefill(
with
monkeypatch
.
context
()
as
m
:
# Make scheduling deterministic for reproducibility
if
current_platform
.
is_rocm
():
# Use spawn to prevent cuda re-initialization error
m
.
setenv
(
"VLLM_WORKER_MULTIPROC_METHOD"
,
"spawn"
)
else
:
m
.
setenv
(
"VLLM_ENABLE_V1_MULTIPROCESSING"
,
"0"
)
prompts
,
answer
,
indices
=
prep_prompts
(
batch_size
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment