Unverified Commit 79a5b632 authored by Or Ozeri's avatar Or Ozeri Committed by GitHub
Browse files

[kv_offload]: Fix num CPU blocks for UniformTypeKVCacheSpecs (#39617)


Signed-off-by: default avatarOr Ozeri <oro@il.ibm.com>
parent c0c98b8b
...@@ -26,17 +26,13 @@ class CPUOffloadingSpec(OffloadingSpec): ...@@ -26,17 +26,13 @@ class CPUOffloadingSpec(OffloadingSpec):
# calculate kv_bytes_per_offloaded_block # calculate kv_bytes_per_offloaded_block
assert kv_cache_config is not None assert kv_cache_config is not None
page_sizes = { if kv_cache_config.num_blocks > 0:
kv_cache_group.kv_cache_spec.page_size_bytes total_gpu_kv_bytes = sum(t.size for t in kv_cache_config.kv_cache_tensors)
for kv_cache_group in kv_cache_config.kv_cache_groups kv_bytes_per_block = (
} total_gpu_kv_bytes // kv_cache_config.num_blocks
assert len(page_sizes) == 1 ) * vllm_config.parallel_config.world_size
page_size_bytes = page_sizes.pop() else:
kv_bytes_per_block = ( kv_bytes_per_block = 0
page_size_bytes
* len(kv_cache_config.kv_cache_tensors)
* vllm_config.parallel_config.world_size
)
kv_bytes_per_offloaded_block = kv_bytes_per_block * self.block_size_factor kv_bytes_per_offloaded_block = kv_bytes_per_block * self.block_size_factor
self.num_blocks = ( self.num_blocks = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment