Unverified Commit 0b51c9bd authored by Jialin Ouyang's avatar Jialin Ouyang Committed by GitHub
Browse files

[Core] Early return in SlidingWindowManager.remove_skipped_blocks (#27673)


Signed-off-by: default avatarJialin Ouyang <Jialin.Ouyang@gmail.com>
parent d3ab240f
......@@ -394,7 +394,13 @@ class SlidingWindowManager(SingleTypeKVCacheManager):
# skipped during the attention computation.
last_useful_token = num_computed_tokens - self.sliding_window + 1
last_useful_block = last_useful_token // self.block_size
if last_useful_block <= 0:
# Early return if tokens are not enough to fill the sliding window
return
blocks = self.req_to_blocks[request_id]
if blocks[last_useful_block - 1] == self._null_block:
# Early return if there are no blocks to remove
return
removed_blocks: list[KVCacheBlock] = []
for i in range(last_useful_block - 1, -1, -1):
if blocks[i] == self._null_block:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment