Unverified Commit ad434d4c authored by Gregory Shtrasberg's avatar Gregory Shtrasberg Committed by GitHub
Browse files

Print the warning only once (#16193)


Signed-off-by: default avatarGregory Shtrasberg <Gregory.Shtrasberg@amd.com>
parent 66d433b9
...@@ -216,17 +216,18 @@ class MultiModalProfiler(Generic[_I]): ...@@ -216,17 +216,18 @@ class MultiModalProfiler(Generic[_I]):
# Encoder-decoder multimodal models only support v0 # Encoder-decoder multimodal models only support v0
if total_len > seq_len: if total_len > seq_len:
# `max_num_batched_tokens` is defined by `SchedulerConfig` # `max_num_batched_tokens` is defined by `SchedulerConfig`
logger.warning( logger.warning_once(
"The encoder sequence length used for profiling (" "The encoder sequence length used for profiling ("
"max_num_batched_tokens / max_num_seqs = %d) is too short " f"max_num_batched_tokens / max_num_seqs = {seq_len}) "
" is too short "
"to hold the multi-modal embeddings in the worst case " "to hold the multi-modal embeddings in the worst case "
"(%d tokens in total, out of which %s are reserved for " f"({total_len} tokens in total, out of which "
f"{total_placeholders_by_modality} are reserved for "
"multi-modal embeddings). This may cause certain " "multi-modal embeddings). This may cause certain "
"multi-modal inputs to fail during inference, even when " "multi-modal inputs to fail during inference, even when "
"the input text is short. To avoid this, you should " "the input text is short. To avoid this, you should "
"increase `max_model_len`, reduce `max_num_seqs`, " "increase `max_model_len`, reduce `max_num_seqs`, "
"and/or reduce `mm_counts`.", seq_len, total_len, "and/or reduce `mm_counts`.")
total_placeholders_by_modality)
processor = cast(EncDecMultiModalProcessor, self.processor) processor = cast(EncDecMultiModalProcessor, self.processor)
if processor.pad_dummy_encoder_prompt: if processor.pad_dummy_encoder_prompt:
...@@ -251,17 +252,18 @@ class MultiModalProfiler(Generic[_I]): ...@@ -251,17 +252,18 @@ class MultiModalProfiler(Generic[_I]):
# V0 does not support chunked prefill. # V0 does not support chunked prefill.
if total_len > seq_len and not envs.VLLM_USE_V1: if total_len > seq_len and not envs.VLLM_USE_V1:
# `max_num_batched_tokens` is defined by `SchedulerConfig` # `max_num_batched_tokens` is defined by `SchedulerConfig`
logger.warning( logger.warning_once(
"The sequence length used for profiling (" "The sequence length used for profiling ("
"max_num_batched_tokens / max_num_seqs = %d) is too short " f"max_num_batched_tokens / max_num_seqs = {seq_len}) "
"is too short "
"to hold the multi-modal embeddings in the worst case " "to hold the multi-modal embeddings in the worst case "
"(%d tokens in total, out of which %s are reserved for " f"({total_len} tokens in total, out of which "
f"{total_placeholders_by_modality} are reserved for "
"multi-modal embeddings). This may cause certain " "multi-modal embeddings). This may cause certain "
"multi-modal inputs to fail during inference, even when " "multi-modal inputs to fail during inference, even when "
"the input text is short. To avoid this, you should " "the input text is short. To avoid this, you should "
"increase `max_model_len`, reduce `max_num_seqs`, " "increase `max_model_len`, reduce `max_num_seqs`, "
"and/or reduce `mm_counts`.", seq_len, total_len, "and/or reduce `mm_counts`.")
total_placeholders_by_modality)
if total_len < seq_len: if total_len < seq_len:
prompt_token_ids.extend([0] * (seq_len - total_len)) prompt_token_ids.extend([0] * (seq_len - total_len))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment