Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c0a350ca
Unverified
Commit
c0a350ca
authored
Jan 19, 2026
by
Andreas Karatzas
Committed by
GitHub
Jan 19, 2026
Browse files
[ROCm][CI] Add ROCm attention backend support for EAGLE DP tests (#32363)
Signed-off-by:
Andreas Karatzas
<
akaratza@amd.com
>
parent
71832ba7
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
6 deletions
+28
-6
tests/v1/distributed/test_eagle_dp.py
tests/v1/distributed/test_eagle_dp.py
+28
-6
No files found.
tests/v1/distributed/test_eagle_dp.py
View file @
c0a350ca
...
@@ -9,17 +9,39 @@ import pytest
...
@@ -9,17 +9,39 @@ import pytest
from
vllm
import
SamplingParams
from
vllm
import
SamplingParams
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.platforms
import
current_platform
from
vllm.sampling_params
import
RequestOutputKind
from
vllm.sampling_params
import
RequestOutputKind
from
vllm.v1.engine.async_llm
import
AsyncLLM
from
vllm.v1.engine.async_llm
import
AsyncLLM
DP_SIZE
=
int
(
os
.
getenv
(
"DP_SIZE"
,
2
))
DP_SIZE
=
int
(
os
.
getenv
(
"DP_SIZE"
,
2
))
if
current_platform
.
is_rocm
():
ATTN_BACKENDS
=
[
"ROCM_ATTN"
,
"TRITON_ATTN"
,
"FLEX_ATTENTION"
]
else
:
ATTN_BACKENDS
=
[
"FLASH_ATTN"
]
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_run_eagle_dp
(
monkeypatch
:
pytest
.
MonkeyPatch
):
@
pytest
.
mark
.
parametrize
(
"attn_backend"
,
ATTN_BACKENDS
)
@
pytest
.
mark
.
xfail
(
current_platform
.
is_rocm
(),
reason
=
"Test may fail on ROCm until batch invariance is enabled."
"See: https://github.com/vllm-project/vllm/issues/27433"
,
strict
=
False
,
)
async
def
test_run_eagle_dp
(
monkeypatch
:
pytest
.
MonkeyPatch
,
attn_backend
:
str
):
if
not
current_platform
.
is_rocm
():
# This test checks that running a model with and without eagle
# This test checks that running a model with and without eagle
# leads to identical tokens. This is only true in batch invariant mode
# leads to identical tokens.
# (because the target model verifies all draft tokens in one big forward pass)
#
# NOTE: This is only true in batch invariant mode
# (because the target model verifies all draft tokens in one big
# forward pass)
#
# TODO[ROCm]: Test is passing on ROCm CI but may break in future.
# Enable batch invariance for ROCm when possible. See:
# https://github.com/vllm-project/vllm/issues/27433
monkeypatch
.
setenv
(
"VLLM_BATCH_INVARIANT"
,
"1"
)
monkeypatch
.
setenv
(
"VLLM_BATCH_INVARIANT"
,
"1"
)
target_model
=
"meta-llama/Llama-3.1-8B-Instruct"
target_model
=
"meta-llama/Llama-3.1-8B-Instruct"
...
@@ -34,7 +56,7 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch):
...
@@ -34,7 +56,7 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch):
data_parallel_backend
=
"mp"
,
# ray takes more time
data_parallel_backend
=
"mp"
,
# ray takes more time
trust_remote_code
=
True
,
trust_remote_code
=
True
,
max_model_len
=
16384
,
max_model_len
=
16384
,
attention_config
=
{
"backend"
:
"FLASH_ATTN"
},
attention_config
=
{
"backend"
:
attn_backend
},
)
)
eagle_engine_args
=
replace
(
eagle_engine_args
=
replace
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment