fix: 只有当kv block中不含有MTP的假数据时才会被cached，以修复cache_full_blocks同一个kv block保存两次的bug

f7cb8c7b · jujl1 · 2c1de3fa · f7cb8c7b
Commit f7cb8c7b authored Jan 04, 2026 by jujl1
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

vllm/v1/core/single_type_kv_cache_manager.py vllm/v1/core/single_type_kv_cache_manager.py +4 -2

No files found.
--- a/vllm/v1/core/single_type_kv_cache_manager.py
+++ b/vllm/v1/core/single_type_kv_cache_manager.py
@@ -10,7 +10,7 @@ from vllm.v1.core.kv_cache_utils import BlockHash, KVCacheBlock
 from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheSpec,
                                        MambaSpec, SlidingWindowSpec)
 from vllm.v1.request import Request
-
+from vllm import envs

 class SingleTypeKVCacheManager(ABC):
    """
@@ -141,7 +141,9 @@ class SingleTypeKVCacheManager(ABC):
        """
        num_cached_blocks = self.num_cached_block[request.request_id]
        num_full_blocks = num_tokens // self.block_size
-
+        if envs.VLLM_ZERO_OVERHEAD_ENHANCE:
+            if num_full_blocks > num_cached_blocks and num_tokens % self.block_size < len(request.spec_token_ids):
+                num_full_blocks -= 1
        self.block_pool.cache_full_blocks(
            request=request,
            blocks=self.req_to_blocks[request.request_id],