Commit 2da6fd72 authored by zhuwenwen's avatar zhuwenwen
Browse files

update common.py

parent f246ee95
...@@ -1031,7 +1031,6 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]): ...@@ -1031,7 +1031,6 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
# and the one from vllm_flash_attn. The former is used on RoCM and the # and the one from vllm_flash_attn. The former is used on RoCM and the
# latter has an additional parameter to control FA2 vs FA3 # latter has an additional parameter to control FA2 vs FA3
self.flash_attn_varlen_func = flash_attn_varlen_func self.flash_attn_varlen_func = flash_attn_varlen_func
# self.vllm_flash_attn_version = None
self.vllm_flash_attn_version = get_flash_attn_version() self.vllm_flash_attn_version = get_flash_attn_version()
if self.vllm_flash_attn_version is not None: if self.vllm_flash_attn_version is not None:
self.flash_attn_varlen_func = \ self.flash_attn_varlen_func = \
......
...@@ -656,7 +656,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]): ...@@ -656,7 +656,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
q=q, q=q,
k=k, k=k,
v=maybe_padded_v, v=maybe_padded_v,
return_softmax_lse=return_softmax_lse, # return_softmax_lse=return_softmax_lse,
softmax_scale=softmax_scale, softmax_scale=softmax_scale,
**kwargs, **kwargs,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment