Unverified Commit 69af3ec3 authored by JieXin Liang's avatar JieXin Liang Committed by GitHub
Browse files

[doc] add note for get_num_kv_splits in triton_backend (#6444)

parent 32cc66ef
......@@ -155,6 +155,9 @@ class TritonAttnBackend(AttentionBackend):
seq_lens: torch.Tensor,
):
num_token, num_seq = num_kv_splits.shape[0], seq_lens.shape[0]
# NOTE(alcanderian): Considering speculative_decodeing,
# num_kv_splits.shape[0] will be topk * real_num_token.
# And the real_num_token is num_seq in decoding phase.
num_group = num_token // num_seq
assert (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment