Unverified Commit 79a5b632 authored by Or Ozeri's avatar Or Ozeri Committed by GitHub
Browse files

[kv_offload]: Fix num CPU blocks for UniformTypeKVCacheSpecs (#39617)


Signed-off-by: default avatarOr Ozeri <oro@il.ibm.com>
parent c0c98b8b
......@@ -26,17 +26,13 @@ class CPUOffloadingSpec(OffloadingSpec):
# calculate kv_bytes_per_offloaded_block
assert kv_cache_config is not None
page_sizes = {
kv_cache_group.kv_cache_spec.page_size_bytes
for kv_cache_group in kv_cache_config.kv_cache_groups
}
assert len(page_sizes) == 1
page_size_bytes = page_sizes.pop()
kv_bytes_per_block = (
page_size_bytes
* len(kv_cache_config.kv_cache_tensors)
* vllm_config.parallel_config.world_size
)
if kv_cache_config.num_blocks > 0:
total_gpu_kv_bytes = sum(t.size for t in kv_cache_config.kv_cache_tensors)
kv_bytes_per_block = (
total_gpu_kv_bytes // kv_cache_config.num_blocks
) * vllm_config.parallel_config.world_size
else:
kv_bytes_per_block = 0
kv_bytes_per_offloaded_block = kv_bytes_per_block * self.block_size_factor
self.num_blocks = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment