Unverified Commit 60e089f0 authored by Xiake Sun's avatar Xiake Sun Committed by GitHub
Browse files

[ROCm][Qwen3-32B] Fix AITER MHA accuracy issue cause by #25763 (#28670)


Signed-off-by: default avatarXiake Sun <xiake.sun@amd.com>
parent d64429bb
......@@ -729,7 +729,7 @@ class AiterFlashAttentionImpl(AttentionImpl):
cu_seqlens_k=attn_metadata.prefill_metadata.query_start_loc,
max_seqlen_q=attn_metadata.prefill_metadata.max_query_len,
max_seqlen_k=attn_metadata.prefill_metadata.max_seq_len,
min_seqlen_q=attn_metadata.prefill_metadata.min_query_len,
min_seqlen_q=1,
dropout_p=0.0,
softmax_scale=self.scale,
causal=True,
......@@ -759,7 +759,7 @@ class AiterFlashAttentionImpl(AttentionImpl):
cu_seqlens_q=attn_metadata.extend_metadata.query_start_loc,
max_seqlen_q=attn_metadata.extend_metadata.max_query_len,
max_seqlen_k=attn_metadata.extend_metadata.max_seq_len,
min_seqlen_q=attn_metadata.extend_metadata.min_query_len,
min_seqlen_q=1,
block_table=attn_metadata.block_table[
num_decodes : num_decodes + num_extends
],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment