fix _forward_encoder_attention

b949b805 · zhuwenwen · d10ac4af · b949b805
Commit b949b805 authored Jan 10, 2026 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

vllm/v1/attention/backends/flash_attn.py vllm/v1/attention/backends/flash_attn.py +1 -1

No files found.
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -776,7 +776,7 @@ class FlashAttentionImpl(AttentionImpl):
                # q_descale=layer._q_scale.expand(descale_shape),
                # k_descale=layer._k_scale.expand(descale_shape),
                # v_descale=layer._v_scale.expand(descale_shape),
-                is_prefix_cache=True,
+                is_prefix_cache=False,
            )

        return output