Unverified Commit eea22a56 authored by Shiyan Deng's avatar Shiyan Deng Committed by GitHub
Browse files

fix amd triton mla path (#17871)

parent 91121552
...@@ -1063,7 +1063,7 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]): ...@@ -1063,7 +1063,7 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
softmax_scale, softmax_scale,
None, # bias None, # bias
) )
if is_vllm_fa: elif is_vllm_fa:
attn_out = self.flash_attn_varlen_func( attn_out = self.flash_attn_varlen_func(
q=q, q=q,
k=k, k=k,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment