"csrc/vscode:/vscode.git/clone" did not exist on "06a760d6e8bcd60dc98775678b5b12eef01d82bb"
Unverified Commit 7e8d97dd authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[BugFix] Honor `enable_caching` in connector-delayed kvcache load case (#19435)


Signed-off-by: default avatarNick Hill <nhill@redhat.com>
parent d70bc7c0
......@@ -381,10 +381,11 @@ class KVCacheManager:
self.coordinator.get_blocks(request_id)).get_block_ids()
def cache_blocks(self, request: Request, num_computed_tokens: int) -> None:
"""Cache the blocks for the request."""
block_hashes = self.req_to_block_hashes[request.request_id]
self.coordinator.cache_blocks(request, block_hashes,
num_computed_tokens)
"""Cache the blocks for the request, if enabled."""
if self.enable_caching:
block_hashes = self.req_to_block_hashes[request.request_id]
self.coordinator.cache_blocks(request, block_hashes,
num_computed_tokens)
def create_empty_block_list(self) -> KVCacheBlocks:
"""Creates a new KVCacheBlocks instance with no blocks."""
......
......@@ -1015,6 +1015,7 @@ class Scheduler(SchedulerInterface):
num_computed_tokens = min(num_computed_tokens, request.num_tokens)
if num_computed_tokens == request.num_tokens:
num_computed_tokens -= 1
# This will cache the blocks iff caching is enabled.
self.kv_cache_manager.cache_blocks(request, num_computed_tokens)
# Update the request state for scheduling.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment