Unverified Commit 12047f5e authored by Zhiqiang Xie's avatar Zhiqiang Xie Committed by GitHub
Browse files

Prevent memory leak of retract_decode when page_size > 1 (#4977)

parent fda6bb78
...@@ -1220,10 +1220,8 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin): ...@@ -1220,10 +1220,8 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
else: else:
# TODO: apply more fine-grained retraction # TODO: apply more fine-grained retraction
last_uncached_pos = ( last_uncached_pos = (
(len(req.prefix_indices) + server_args.page_size - 1) len(req.prefix_indices) // server_args.page_size
// server_args.page_size ) * server_args.page_size
* server_args.page_size
)
token_indices = self.req_to_token_pool.req_to_token[ token_indices = self.req_to_token_pool.req_to_token[
req.req_pool_idx, last_uncached_pos : seq_lens_cpu[idx] req.req_pool_idx, last_uncached_pos : seq_lens_cpu[idx]
] ]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment