Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0adf9cda
Commit
0adf9cda
authored
Oct 16, 2025
by
zhuwenwen
Browse files
fix v0 eager fa-pa acc error
parent
aed7c9f8
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
3 deletions
+6
-3
vllm/attention/backends/rocm_flash_attn.py
vllm/attention/backends/rocm_flash_attn.py
+6
-3
No files found.
vllm/attention/backends/rocm_flash_attn.py
View file @
0adf9cda
...
...
@@ -944,11 +944,11 @@ class ROCmFlashAttentionImpl(AttentionImpl):
decode_meta
.
max_decode_seq_len
,
self
.
sliding_window
,
self
.
kv_cache_dtype
,
self
.
alibi_slopes
)
if
use_custom
:
max_seq_len
=
(
decode_meta
.
max_decode_seq_len
if
self
.
attn_type
!=
AttentionType
.
ENCODER_DECODER
else
decode_meta
.
max_encoder_seq_len
)
assert
max_seq_len
is
not
None
if
use_custom
:
max_num_partitions
=
(
(
max_seq_len
+
_PARTITION_SIZE_ROCM
-
1
)
//
_PARTITION_SIZE_ROCM
)
...
...
@@ -1002,6 +1002,8 @@ class ROCmFlashAttentionImpl(AttentionImpl):
tree_attention_masks_tensor
=
decode_meta
.
tree_attention_masks_tensor
if
envs
.
VLLM_USE_FLASH_ATTN_PA
:
from
flash_attn
import
vllm_flash_attn_with_kvcache
if
decode_meta
.
use_cuda_graph
:
max_seq_len
=
0
if
envs
.
VLLM_USE_PA_PRINT_PARAM
:
print
(
"PA SIZE:"
)
print
(
f
"q.shape =
{
decode_query
.
unsqueeze
(
1
).
shape
}
, key_cache.shape =
{
key_cache
.
shape
}
, value_cache.shape =
{
value_cache
.
shape
}
, kv_cache_dtype =
{
self
.
kv_cache_dtype
}
"
)
...
...
@@ -1024,6 +1026,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
k_scale
=
layer
.
_k_scale
,
v_scale
=
layer
.
_v_scale
,
kv_cache_dtype
=
self
.
kv_cache_dtype
,
max_seqlen_k
=
max_seq_len
,
).
squeeze
(
1
)
else
:
out_pa
[:]
=
paged_attn
.
forward_decode
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment