Commit 58042164 authored by zhuwenwen's avatar zhuwenwen
Browse files

update flash_attn.py

parent dbd62f84
......@@ -977,7 +977,6 @@ class FlashAttentionImpl(AttentionImpl):
v_descale=layer._v_scale.expand(descale_shape),
)
else:
decode_output = decode_output.unsqueeze(1)
decode_output = flash_attn_with_kvcache(
q=decode_query.unsqueeze(1),
k_cache=key_cache,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment