[BugFix] Honor `enable_caching` in connector-delayed kvcache load case (#19435)

Signed-off-by: Nick Hill <nhill@redhat.com>

[BugFix] Honor `enable_caching` in connector-delayed kvcache load case (#19435)
Signed-off-by: Nick Hill <nhill@redhat.com>
7e8d97dd · Nick Hill · GitHub · d70bc7c0 · 7e8d97dd · 7e8d97dd
Unverified Commit 7e8d97dd authored Jun 13, 2025 by Nick Hill Committed by GitHub Jun 13, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

vllm/v1/core/kv_cache_manager.py vllm/v1/core/kv_cache_manager.py +5 -4

vllm/v1/core/sched/scheduler.py vllm/v1/core/sched/scheduler.py +1 -0

No files found.
--- a/vllm/v1/core/kv_cache_manager.py
+++ b/vllm/v1/core/kv_cache_manager.py
@@ -381,10 +381,11 @@ class KVCacheManager:
            self.coordinator.get_blocks(request_id)).get_block_ids()

    def cache_blocks(self, request: Request, num_computed_tokens: int) -> None:
-        """Cache the blocks for the request."""
-        block_hashes = self.req_to_block_hashes[request.request_id]
-        self.coordinator.cache_blocks(request, block_hashes,
-                                      num_computed_tokens)
+        """Cache the blocks for the request, if enabled."""
+        if self.enable_caching:
+            block_hashes = self.req_to_block_hashes[request.request_id]
+            self.coordinator.cache_blocks(request, block_hashes,
+                                          num_computed_tokens)

    def create_empty_block_list(self) -> KVCacheBlocks:
        """Creates a new KVCacheBlocks instance with no blocks."""

--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -1015,6 +1015,7 @@ class Scheduler(SchedulerInterface):
        num_computed_tokens = min(num_computed_tokens, request.num_tokens)
        if num_computed_tokens == request.num_tokens:
            num_computed_tokens -= 1
+        # This will cache the blocks iff caching is enabled.
        self.kv_cache_manager.cache_blocks(request, num_computed_tokens)

        # Update the request state for scheduling.