Unverified Commit 90b78ec5 authored by Jinghui Zhang's avatar Jinghui Zhang Committed by GitHub
Browse files

[v1][P/D] Fix a edge case in kv cache schedule (#19182)


Co-authored-by: default avatarjinghui <jinghui@fb.com>
parent 91a2ef98
...@@ -1009,6 +1009,8 @@ class Scheduler(SchedulerInterface): ...@@ -1009,6 +1009,8 @@ class Scheduler(SchedulerInterface):
# Now that the blocks are ready, actually cache them. # Now that the blocks are ready, actually cache them.
block_ids = self.kv_cache_manager.get_block_ids(request.request_id)[0] block_ids = self.kv_cache_manager.get_block_ids(request.request_id)[0]
num_computed_tokens = len(block_ids) * self.block_size num_computed_tokens = len(block_ids) * self.block_size
# Handle the case where num request tokens less then one block.
num_computed_tokens = min(num_computed_tokens, request.num_tokens)
if num_computed_tokens == request.num_tokens: if num_computed_tokens == request.num_tokens:
num_computed_tokens -= 1 num_computed_tokens -= 1
self.kv_cache_manager.cache_blocks( self.kv_cache_manager.cache_blocks(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment