Commit 8d0b2f15 authored by lizhigong's avatar lizhigong
Browse files

fix bug on flash attention use for chunkprefill and radix cache

parent d297cda2
......@@ -86,9 +86,10 @@ def flash_attn_varlen_func(
k=k,
v=v,
cu_seqlens_q=cu_seqlens_q,
cu_seqlens_k=cu_seqlens_q,
cu_seqlens_k=cu_seqlens_k,
max_seqlen_q=max_seqlen_q,
max_seqlen_k=max_seqlen_q,
max_seqlen_k=max_seqlen_k,
softmax_scale=softmax_scale,
causal=causal,
return_attn_probs=return_softmax_lse,
)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment