Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ef1f7030
Unverified
Commit
ef1f7030
authored
Nov 25, 2025
by
Micah Williamson
Committed by
GitHub
Nov 25, 2025
Browse files
[ROCm][CI] Fix test_cudagraph_mode failure in AMD CI (#29367)
Signed-off-by:
Micah Williamson
<
micah.williamson@amd.com
>
parent
12c007e2
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
51 additions
and
22 deletions
+51
-22
tests/v1/attention/utils.py
tests/v1/attention/utils.py
+7
-0
tests/v1/cudagraph/test_cudagraph_mode.py
tests/v1/cudagraph/test_cudagraph_mode.py
+42
-20
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+2
-2
No files found.
tests/v1/attention/utils.py
View file @
ef1f7030
...
@@ -340,4 +340,11 @@ full_cg_backend_configs = {
...
@@ -340,4 +340,11 @@ full_cg_backend_configs = {
"cudagraph_mode"
:
"FULL_AND_PIECEWISE"
,
"cudagraph_mode"
:
"FULL_AND_PIECEWISE"
,
},
},
),
),
"RocmAttn"
:
BackendConfig
(
name
=
"RocmAttn"
,
env_vars
=
{
"VLLM_V1_USE_PREFILL_DECODE_ATTENTION"
:
"1"
},
comp_config
=
{
"cudagraph_mode"
:
"FULL"
,
},
),
}
}
tests/v1/cudagraph/test_cudagraph_mode.py
View file @
ef1f7030
...
@@ -35,14 +35,22 @@ def temporary_environ(env_vars):
...
@@ -35,14 +35,22 @@ def temporary_environ(env_vars):
# test attention backend and cudagraph_mode combo
# test attention backend and cudagraph_mode combo
# (backend_name, cudagraph_mode, supported)
# (backend_name, cudagraph_mode, supported)
combo_cases_1
=
[
if
current_platform
.
is_rocm
():
combo_cases_1
=
[
(
"RocmAttn"
,
"FULL"
,
True
),
(
"RocmAttn"
,
"FULL_AND_PIECEWISE"
,
True
),
(
"TritonAttn"
,
"FULL"
,
True
),
(
"TritonAttn"
,
"FULL_AND_PIECEWISE"
,
True
),
]
else
:
combo_cases_1
=
[
(
"FA3"
,
"FULL"
,
True
),
(
"FA3"
,
"FULL"
,
True
),
(
"FA3"
,
"FULL_AND_PIECEWISE"
,
True
),
(
"FA3"
,
"FULL_AND_PIECEWISE"
,
True
),
(
"FA2"
,
"FULL"
,
True
),
# Should fallback to FULL_AND_PIECEWISE
(
"FA2"
,
"FULL"
,
True
),
# Should fallback to FULL_AND_PIECEWISE
(
"FA2"
,
"FULL_AND_PIECEWISE"
,
True
),
(
"FA2"
,
"FULL_AND_PIECEWISE"
,
True
),
(
"FlashInfer"
,
"FULL"
,
True
),
# Should fallback to FULL_AND_PIECEWISE
(
"FlashInfer"
,
"FULL"
,
True
),
# Should fallback to FULL_AND_PIECEWISE
(
"FlashInfer"
,
"FULL_AND_PIECEWISE"
,
True
),
(
"FlashInfer"
,
"FULL_AND_PIECEWISE"
,
True
),
]
]
@
pytest
.
mark
.
parametrize
(
"backend_name, cudagraph_mode, supported"
,
combo_cases_1
)
@
pytest
.
mark
.
parametrize
(
"backend_name, cudagraph_mode, supported"
,
combo_cases_1
)
...
@@ -92,7 +100,21 @@ def test_backend_and_cudagraph_mode_combo(backend_name, cudagraph_mode, supporte
...
@@ -92,7 +100,21 @@ def test_backend_and_cudagraph_mode_combo(backend_name, cudagraph_mode, supporte
# test cudagraph_mode with different compilation mode.
# test cudagraph_mode with different compilation mode.
# (backend_name, cudagraph_mode, compilation_mode, supported)
# (backend_name, cudagraph_mode, compilation_mode, supported)
combo_cases_2
=
[
if
current_platform
.
is_rocm
():
combo_cases_2
=
[
(
"RocmAttn"
,
"FULL"
,
CompilationMode
.
NONE
,
True
),
(
"RocmAttn"
,
"FULL"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
(
"RocmAttn"
,
"PIECEWISE"
,
CompilationMode
.
NONE
,
False
),
(
"RocmAttn"
,
"PIECEWISE"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
(
"RocmAttn"
,
"FULL_AND_PIECEWISE"
,
CompilationMode
.
NONE
,
False
),
(
"RocmAttn"
,
"FULL_AND_PIECEWISE"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
(
"RocmAttn"
,
"FULL_DECODE_ONLY"
,
CompilationMode
.
NONE
,
True
),
(
"RocmAttn"
,
"FULL_DECODE_ONLY"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
(
"RocmAttn"
,
"NONE"
,
CompilationMode
.
NONE
,
True
),
(
"RocmAttn"
,
"NONE"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
]
else
:
combo_cases_2
=
[
(
"FA2"
,
"FULL"
,
CompilationMode
.
NONE
,
True
),
(
"FA2"
,
"FULL"
,
CompilationMode
.
NONE
,
True
),
(
"FA2"
,
"FULL"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
(
"FA2"
,
"FULL"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
(
"FA2"
,
"PIECEWISE"
,
CompilationMode
.
NONE
,
False
),
(
"FA2"
,
"PIECEWISE"
,
CompilationMode
.
NONE
,
False
),
...
@@ -103,7 +125,7 @@ combo_cases_2 = [
...
@@ -103,7 +125,7 @@ combo_cases_2 = [
(
"FA2"
,
"FULL_DECODE_ONLY"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
(
"FA2"
,
"FULL_DECODE_ONLY"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
(
"FA2"
,
"NONE"
,
CompilationMode
.
NONE
,
True
),
(
"FA2"
,
"NONE"
,
CompilationMode
.
NONE
,
True
),
(
"FA2"
,
"NONE"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
(
"FA2"
,
"NONE"
,
CompilationMode
.
VLLM_COMPILE
,
True
),
]
]
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
...
...
vllm/platforms/rocm.py
View file @
ef1f7030
...
@@ -321,8 +321,8 @@ class RocmPlatform(Platform):
...
@@ -321,8 +321,8 @@ class RocmPlatform(Platform):
return
AttentionBackendEnum
.
TRITON_ATTN
.
get_path
()
return
AttentionBackendEnum
.
TRITON_ATTN
.
get_path
()
raise
RuntimeError
(
raise
RuntimeError
(
"V0 a
ttention backend
s have been removed. Set VLLM_USE_V1=1
"
f
"A
ttention backend
{
selected_backend
.
name
}
is not supported on
"
"
to select a supported backen
d."
"
ROCm. Note that V0 attention backends have been remove
d."
)
)
@
classmethod
@
classmethod
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment