Unverified Commit 5b2dcbf0 authored by inkcherry's avatar inkcherry Committed by GitHub
Browse files

Fix Whisper crash caused by invalid``` max_num_batched_tokens``` config (#17853)


Signed-off-by: default avatarinkcherry <mingzhi.liu@intel.com>
parent 6e4a93e3
......@@ -2050,6 +2050,13 @@ class SchedulerConfig:
_MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS,
)
# When using default settings,
# Ensure max_num_batched_tokens does not exceed model limit.
# Some models (e.g., Whisper) have embeddings tied to max length.
self.max_num_batched_tokens = min(
self.max_num_seqs * self.max_model_len,
self.max_num_batched_tokens)
self.max_num_encoder_input_tokens = self.max_num_batched_tokens
self.encoder_cache_size = self.max_num_batched_tokens
......@@ -2090,6 +2097,13 @@ class SchedulerConfig:
"be greater than or equal to max_num_seqs "
f"({self.max_num_seqs}).")
if self.max_num_batched_tokens > self.max_num_seqs * self.max_model_len:
logger.warning(
"max_num_batched_tokens (%d) exceeds max_num_seqs"
"* max_model_len (%d). This may lead to unexpected behavior.",
self.max_num_batched_tokens,
self.max_num_seqs * self.max_model_len)
if self.num_lookahead_slots < 0:
raise ValueError(
"num_lookahead_slots "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment