Unverified Commit c9280e63 authored by jmswen's avatar jmswen Committed by GitHub
Browse files

[Bugfix] Respect num-gpu-blocks-override in v1 (#19503)


Signed-off-by: default avatarJon Swenson <jmswen@gmail.com>
parent af09b3f0
......@@ -900,3 +900,19 @@ def test_get_kv_cache_config():
with pytest.raises(NotImplementedError):
get_kv_cache_config(vllm_config, kv_cache_specs_hybrid,
mem_per_block_per_layer * 2 * 32)
# Test num_gpu_blocks_override
vllm_config.cache_config.num_gpu_blocks_override = 16
kv_cache_config_override_blocks = get_kv_cache_config(
vllm_config, kv_cache_specs_full, mem_per_block_per_layer * 2 * 32)
assert kv_cache_config_override_blocks == KVCacheConfig(
num_blocks=16,
kv_cache_tensors=[
KVCacheTensor(size=mem_per_block_per_layer * 16,
shared_by=["layer_1"]),
KVCacheTensor(size=mem_per_block_per_layer * 16,
shared_by=["layer_2"]),
],
kv_cache_groups=[
KVCacheGroupSpec(["layer_1", "layer_2"], new_kv_cache_spec())
])
\ No newline at end of file
......@@ -660,6 +660,7 @@ def get_num_blocks(vllm_config: VllmConfig, num_layers: int,
logger.info(
"Overriding num_gpu_blocks=%d with "
"num_gpu_blocks_override=%d", num_blocks, num_gpu_blocks_override)
num_blocks = num_gpu_blocks_override
return num_blocks
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment