Unverified Commit 6f019e6e authored by Harry Huang's avatar Harry Huang Committed by GitHub
Browse files

[BugFix] Add block_size validation for mamba cache align mode (#34445)


Signed-off-by: default avatarhuanghaoyan.hhy <huanghaoyan.hhy@alibaba-inc.com>
parent d707678d
...@@ -1110,6 +1110,15 @@ class VllmConfig: ...@@ -1110,6 +1110,15 @@ class VllmConfig:
self.scheduler_config.disable_hybrid_kv_cache_manager = False self.scheduler_config.disable_hybrid_kv_cache_manager = False
if self.cache_config.mamba_cache_mode == "align": if self.cache_config.mamba_cache_mode == "align":
assert (
self.cache_config.block_size
<= self.scheduler_config.max_num_batched_tokens
), (
"In Mamba cache align mode, block_size "
f"({self.cache_config.block_size}) must be <= "
"max_num_batched_tokens "
f"({self.scheduler_config.max_num_batched_tokens})."
)
if self.scheduler_config.long_prefill_token_threshold > 0: if self.scheduler_config.long_prefill_token_threshold > 0:
assert ( assert (
self.scheduler_config.long_prefill_token_threshold self.scheduler_config.long_prefill_token_threshold
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment