Commit f7cb8c7b authored by jujl1's avatar jujl1
Browse files

fix: 只有当kv block中不含有MTP的假数据时才会被cached,以修复cache_full_blocks同一个kv block保存两次的bug

parent 2c1de3fa
......@@ -10,7 +10,7 @@ from vllm.v1.core.kv_cache_utils import BlockHash, KVCacheBlock
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheSpec,
MambaSpec, SlidingWindowSpec)
from vllm.v1.request import Request
from vllm import envs
class SingleTypeKVCacheManager(ABC):
"""
......@@ -141,7 +141,9 @@ class SingleTypeKVCacheManager(ABC):
"""
num_cached_blocks = self.num_cached_block[request.request_id]
num_full_blocks = num_tokens // self.block_size
if envs.VLLM_ZERO_OVERHEAD_ENHANCE:
if num_full_blocks > num_cached_blocks and num_tokens % self.block_size < len(request.spec_token_ids):
num_full_blocks -= 1
self.block_pool.cache_full_blocks(
request=request,
blocks=self.req_to_blocks[request.request_id],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment