Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ad434d4c
Unverified
Commit
ad434d4c
authored
Apr 07, 2025
by
Gregory Shtrasberg
Committed by
GitHub
Apr 07, 2025
Browse files
Print the warning only once (#16193)
Signed-off-by:
Gregory Shtrasberg
<
Gregory.Shtrasberg@amd.com
>
parent
66d433b9
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
10 deletions
+12
-10
vllm/multimodal/profiling.py
vllm/multimodal/profiling.py
+12
-10
No files found.
vllm/multimodal/profiling.py
View file @
ad434d4c
...
...
@@ -216,17 +216,18 @@ class MultiModalProfiler(Generic[_I]):
# Encoder-decoder multimodal models only support v0
if
total_len
>
seq_len
:
# `max_num_batched_tokens` is defined by `SchedulerConfig`
logger
.
warning
(
logger
.
warning
_once
(
"The encoder sequence length used for profiling ("
"max_num_batched_tokens / max_num_seqs = %d) is too short "
f
"max_num_batched_tokens / max_num_seqs =
{
seq_len
}
) "
" is too short "
"to hold the multi-modal embeddings in the worst case "
"(%d tokens in total, out of which %s are reserved for "
f
"(
{
total_len
}
tokens in total, out of which "
f
"
{
total_placeholders_by_modality
}
are reserved for "
"multi-modal embeddings). This may cause certain "
"multi-modal inputs to fail during inference, even when "
"the input text is short. To avoid this, you should "
"increase `max_model_len`, reduce `max_num_seqs`, "
"and/or reduce `mm_counts`."
,
seq_len
,
total_len
,
total_placeholders_by_modality
)
"and/or reduce `mm_counts`."
)
processor
=
cast
(
EncDecMultiModalProcessor
,
self
.
processor
)
if
processor
.
pad_dummy_encoder_prompt
:
...
...
@@ -251,17 +252,18 @@ class MultiModalProfiler(Generic[_I]):
# V0 does not support chunked prefill.
if
total_len
>
seq_len
and
not
envs
.
VLLM_USE_V1
:
# `max_num_batched_tokens` is defined by `SchedulerConfig`
logger
.
warning
(
logger
.
warning
_once
(
"The sequence length used for profiling ("
"max_num_batched_tokens / max_num_seqs = %d) is too short "
f
"max_num_batched_tokens / max_num_seqs =
{
seq_len
}
) "
"is too short "
"to hold the multi-modal embeddings in the worst case "
"(%d tokens in total, out of which %s are reserved for "
f
"(
{
total_len
}
tokens in total, out of which "
f
"
{
total_placeholders_by_modality
}
are reserved for "
"multi-modal embeddings). This may cause certain "
"multi-modal inputs to fail during inference, even when "
"the input text is short. To avoid this, you should "
"increase `max_model_len`, reduce `max_num_seqs`, "
"and/or reduce `mm_counts`."
,
seq_len
,
total_len
,
total_placeholders_by_modality
)
"and/or reduce `mm_counts`."
)
if
total_len
<
seq_len
:
prompt_token_ids
.
extend
([
0
]
*
(
seq_len
-
total_len
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment