@@ -234,19 +234,6 @@ class MultiModalProfiler(Generic[_I]):
...
@@ -234,19 +234,6 @@ class MultiModalProfiler(Generic[_I]):
prompt_token_ids=mm_inputs["prompt_token_ids"]
prompt_token_ids=mm_inputs["prompt_token_ids"]
total_len=len(prompt_token_ids)
total_len=len(prompt_token_ids)
# V0 does not support chunked prefill.
iftotal_len>seq_lenandnotenvs.VLLM_USE_V1:
# `max_num_batched_tokens` is defined by `SchedulerConfig`
logger.warning_once(
"The sequence length used for profiling (max_num_batched_tokens / max_num_seqs = %d) "# noqa: E501
"is too short to hold the multi-modal embeddings in the worst case (%d tokens in total, out of which %s are reserved for multi-modal embeddings). "# noqa: E501
"This may cause certain multi-modal inputs to fail during inference, even when the input text is short. "# noqa: E501
"To avoid this, you should increase `max_model_len`, reduce `max_num_seqs`, and/or reduce `mm_counts`.",# noqa: E501
seq_len,
total_len,
str(self._get_mm_num_tokens(mm_inputs)),
)
iftotal_len<seq_len:
iftotal_len<seq_len:
prompt_token_ids.extend([0]*(seq_len-total_len))
prompt_token_ids.extend([0]*(seq_len-total_len))
...
@@ -270,22 +257,6 @@ class MultiModalProfiler(Generic[_I]):
...
@@ -270,22 +257,6 @@ class MultiModalProfiler(Generic[_I]):