Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d30aa7e9
Unverified
Commit
d30aa7e9
authored
Mar 16, 2025
by
Kyle Sayers
Committed by
GitHub
Mar 16, 2025
Browse files
[Bugfix] Limit profiling run sequence length by max_model_len (#14785)
Signed-off-by:
Kyle Sayers
<
kylesayrs@gmail.com
>
parent
d1ad2a57
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
9 additions
and
0 deletions
+9
-0
vllm/inputs/registry.py
vllm/inputs/registry.py
+5
-0
vllm/worker/enc_dec_model_runner.py
vllm/worker/enc_dec_model_runner.py
+1
-0
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+1
-0
vllm/worker/openvino_model_runner.py
vllm/worker/openvino_model_runner.py
+1
-0
vllm/worker/xpu_model_runner.py
vllm/worker/xpu_model_runner.py
+1
-0
No files found.
vllm/inputs/registry.py
View file @
d30aa7e9
...
...
@@ -330,6 +330,11 @@ class InputRegistry:
from
vllm.multimodal
import
MultiModalKwargs
from
vllm.multimodal.profiling
import
MultiModalProfiler
if
seq_len
>
model_config
.
max_model_len
:
raise
AssertionError
(
f
"Profiling attempted with sequence length (
{
seq_len
}
) "
f
"greater than model length (
{
model_config
.
max_model_len
}
)"
)
if
mm_registry
.
has_processor
(
model_config
):
tokenizer
=
cached_tokenizer_from_config
(
model_config
)
processor
=
mm_registry
.
create_processor
(
model_config
,
...
...
vllm/worker/enc_dec_model_runner.py
View file @
d30aa7e9
...
...
@@ -281,6 +281,7 @@ class EncoderDecoderModelRunner(GPUModelRunnerBase[EncoderDecoderModelInput]):
for
group_id
in
range
(
max_num_seqs
):
seq_len
=
(
max_num_batched_tokens
//
max_num_seqs
+
(
group_id
<
max_num_batched_tokens
%
max_num_seqs
))
seq_len
=
min
(
seq_len
,
self
.
model_config
.
max_model_len
)
batch_size
+=
seq_len
decoder_dummy_data
=
self
.
input_registry
\
...
...
vllm/worker/model_runner.py
View file @
d30aa7e9
...
...
@@ -1302,6 +1302,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
for
group_id
in
range
(
max_num_seqs
):
seq_len
=
(
max_num_batched_tokens
//
max_num_seqs
+
(
group_id
<
max_num_batched_tokens
%
max_num_seqs
))
seq_len
=
min
(
seq_len
,
self
.
model_config
.
max_model_len
)
batch_size
+=
seq_len
dummy_data
=
self
.
input_registry
\
...
...
vllm/worker/openvino_model_runner.py
View file @
d30aa7e9
...
...
@@ -148,6 +148,7 @@ class OpenVINOModelRunner(ModelRunnerBase):
seq_len
=
min
(
seq_data
.
get_len
(),
computed_len
+
seq_group_metadata
.
token_chunk_size
,
self
.
model_config
.
max_model_len
,
)
if
is_prompt
:
tokens
=
seq_data
.
get_token_ids
()[
computed_len
:
seq_len
]
...
...
vllm/worker/xpu_model_runner.py
View file @
d30aa7e9
...
...
@@ -466,6 +466,7 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
for
group_id
in
range
(
max_num_seqs
):
seq_len
=
(
max_num_batched_tokens
//
max_num_seqs
+
(
group_id
<
max_num_batched_tokens
%
max_num_seqs
))
seq_len
=
min
(
seq_len
,
self
.
model_config
.
max_model_len
)
batch_size
+=
seq_len
dummy_data
=
self
.
input_registry
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment