Unverified Commit 8041b730 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[BugFix] Raise error when max_model_len is larger than KV cache (#2163)

parent 3ec8c25c
...@@ -227,6 +227,14 @@ class LLMEngine: ...@@ -227,6 +227,14 @@ class LLMEngine:
raise ValueError("No available memory for the cache blocks. " raise ValueError("No available memory for the cache blocks. "
"Try increasing `gpu_memory_utilization` when " "Try increasing `gpu_memory_utilization` when "
"initializing the engine.") "initializing the engine.")
max_seq_len = self.cache_config.block_size * num_gpu_blocks
if self.model_config.max_model_len > max_seq_len:
raise ValueError(
f"The model's max seq len ({self.model_config.max_model_len}) "
"is larger than the maximum number of tokens that can be "
f"stored in KV cache ({max_seq_len}). Try increasing "
"`gpu_memory_utilization` or decreasing `max_model_len` when "
"initializing the engine.")
self.cache_config.num_gpu_blocks = num_gpu_blocks self.cache_config.num_gpu_blocks = num_gpu_blocks
self.cache_config.num_cpu_blocks = num_cpu_blocks self.cache_config.num_cpu_blocks = num_cpu_blocks
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment