Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
176a95c6
Unverified
Commit
176a95c6
authored
May 13, 2025
by
Luka Govedič
Committed by
GitHub
May 13, 2025
Browse files
[Fix] Support CUDAGraph capture for encoder-decoder on ROCm (#18104)
Signed-off-by:
Luka Govedič
<
lgovedic@redhat.com
>
parent
f2ae883b
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
8 deletions
+8
-8
vllm/attention/backends/utils.py
vllm/attention/backends/utils.py
+8
-8
No files found.
vllm/attention/backends/utils.py
View file @
176a95c6
...
@@ -345,10 +345,10 @@ class CommonAttentionState(AttentionState):
...
@@ -345,10 +345,10 @@ class CommonAttentionState(AttentionState):
if
is_encoder_decoder_model
:
if
is_encoder_decoder_model
:
# The encoder decoder model works only with XFormers and
# The encoder decoder model works only with XFormers and
# Flash Attention backend. Assert the same.
# Flash Attention backend. Assert the same.
assert
self
.
runner
.
attn_backend
.
get_name
()
in
\
assert
self
.
runner
.
attn_backend
.
get_name
()
in
\
[
"XFORMERS"
,
"FLASH_ATTN"
],
\
[
"XFORMERS"
,
"FLASH_ATTN"
,
"ROCM_FLASH"
],
\
f
"Expected attn_backend name to be either 'XFORMERS'
or
"
\
f
"Expected attn_backend name to be either 'XFORMERS'
,
"
\
f
"'FLASH_ATTN', but "
\
f
"
'ROCM_FLASH', or
'FLASH_ATTN', but "
\
f
"got '
{
self
.
runner
.
attn_backend
.
get_name
()
}
'"
f
"got '
{
self
.
runner
.
attn_backend
.
get_name
()
}
'"
self
.
_update_captured_metadata_for_enc_dec_model
(
self
.
_update_captured_metadata_for_enc_dec_model
(
batch_size
=
batch_size
,
attn_metadata
=
attn_metadata
)
batch_size
=
batch_size
,
attn_metadata
=
attn_metadata
)
...
@@ -367,10 +367,10 @@ class CommonAttentionState(AttentionState):
...
@@ -367,10 +367,10 @@ class CommonAttentionState(AttentionState):
if
is_encoder_decoder_model
:
if
is_encoder_decoder_model
:
# The encoder decoder model works only with XFormers and
# The encoder decoder model works only with XFormers and
# Flash Attention backend. Assert the same.
# Flash Attention backend. Assert the same.
assert
self
.
runner
.
attn_backend
.
get_name
()
in
\
assert
self
.
runner
.
attn_backend
.
get_name
()
in
\
[
"XFORMERS"
,
"FLASH_ATTN"
],
\
[
"XFORMERS"
,
"FLASH_ATTN"
,
"ROCM_FLASH"
],
\
f
"Expected attn_backend name to be either 'XFORMERS'
or "
\
f
"Expected attn_backend name to be either 'XFORMERS'
,"
\
f
"'FLASH_ATTN', but "
\
f
"
'ROCM_FLASH', or
'FLASH_ATTN', but "
\
f
"got '
{
self
.
runner
.
attn_backend
.
get_name
()
}
'"
f
"got '
{
self
.
runner
.
attn_backend
.
get_name
()
}
'"
self
.
_add_additonal_input_buffers_for_enc_dec_model
(
self
.
_add_additonal_input_buffers_for_enc_dec_model
(
attn_metadata
=
attn_metadata
,
input_buffers
=
input_buffers
)
attn_metadata
=
attn_metadata
,
input_buffers
=
input_buffers
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment