Unverified Commit 7811bfda authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

compatible with flashinfer v0.2 (#3235)

parent 656f7fc1
...@@ -800,7 +800,9 @@ class FlashInferIndicesUpdaterPrefill: ...@@ -800,7 +800,9 @@ class FlashInferIndicesUpdaterPrefill:
kv_indptr[1 : bs + 1] = torch.cumsum(paged_kernel_lens, dim=0) kv_indptr[1 : bs + 1] = torch.cumsum(paged_kernel_lens, dim=0)
kv_indptr = kv_indptr[: bs + 1] kv_indptr = kv_indptr[: bs + 1]
kv_indices = torch.empty( kv_indices = torch.empty(
paged_kernel_lens_sum, dtype=torch.int32, device="cuda" paged_kernel_lens_sum + 256,
dtype=torch.int32,
device=req_pool_indices.device,
) )
create_flashinfer_kv_indices_triton[(bs,)]( create_flashinfer_kv_indices_triton[(bs,)](
self.req_to_token, self.req_to_token,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment