Unverified Commit 27e327b4 authored by pansicheng's avatar pansicheng Committed by GitHub
Browse files

fix new_page_count_next_decode (#6671)

parent ff00895c
...@@ -1333,7 +1333,9 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin): ...@@ -1333,7 +1333,9 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
page_size = self.token_to_kv_pool_allocator.page_size page_size = self.token_to_kv_pool_allocator.page_size
if page_size == 1: if page_size == 1:
return len(self.reqs) return len(self.reqs)
return sum(1 for req in self.reqs if req.seqlen % page_size == 0) # In the decoding phase, the length of a request's KV cache should be
# the total length of the request minus 1
return sum(1 for req in self.reqs if (req.seqlen - 1) % page_size == 0)
def check_decode_mem(self, buf_multiplier=1): def check_decode_mem(self, buf_multiplier=1):
tokens_required = ( tokens_required = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment