Commit cc7715fd authored by zhuwenwen's avatar zhuwenwen
Browse files

add sinks of vllm_flash_attn_varlen_func

parent 965934b8
...@@ -644,7 +644,7 @@ class FlashAttentionImpl(AttentionImpl): ...@@ -644,7 +644,7 @@ class FlashAttentionImpl(AttentionImpl):
# k_descale=layer._k_scale.expand(descale_shape), # k_descale=layer._k_scale.expand(descale_shape),
# v_descale=layer._v_scale.expand(descale_shape), # v_descale=layer._v_scale.expand(descale_shape),
# num_splits=attn_metadata.max_num_splits, # num_splits=attn_metadata.max_num_splits,
# s_aux=self.sinks, s_aux=self.sinks,
is_prefix_cache=True, is_prefix_cache=True,
) )
return output return output
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment