Commit 4080ac85 authored by zhuwenwen's avatar zhuwenwen
Browse files

update flash_attn.py

parent 64fc5a29
...@@ -717,7 +717,11 @@ class FlashAttentionImpl(AttentionImpl): ...@@ -717,7 +717,11 @@ class FlashAttentionImpl(AttentionImpl):
window_size=self.sliding_window, window_size=self.sliding_window,
block_table=block_table, block_table=block_table,
softcap=self.logits_soft_cap, softcap=self.logits_soft_cap,
# scheduler_metadata=scheduler_metadata, scheduler_metadata=scheduler_metadata,
# fa_version=self.vllm_flash_attn_version,
# q_descale=layer._q_scale.expand(descale_shape),
# k_descale=layer._k_scale.expand(descale_shape),
# v_descale=layer._v_scale.expand(descale_shape),
) )
return output return output
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment