"docs/git@developer.sourcefind.cn:change/sglang.git" did not exist on "1ec976975330a0e8df7fae564ac795286817b62d"
Unverified Commit 24eaebeb authored by Nathan Wang's avatar Nathan Wang Committed by GitHub
Browse files

Fix FlashInfer GPU <-> CPU sync (#9409)

parent a91e90d9
......@@ -1372,7 +1372,14 @@ def fast_decode_plan(
if self.use_tensor_cores:
# ALSO convert last_page_len to CPU
last_page_len_host = last_page_len.cpu()
if page_size == 1:
# When page size is 1, last_page_len is always 1.
# Directly construct the host tensor rather than executing a device-to-host copy.
last_page_len_host = torch.ones(
(batch_size,), dtype=torch.int32, device="cpu"
)
else:
last_page_len_host = last_page_len.cpu()
kv_lens_arr_host = get_seq_lens(indptr_host, last_page_len_host, page_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment