Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6bbf1795
Unverified
Commit
6bbf1795
authored
Jul 09, 2025
by
B-201
Committed by
GitHub
Jul 08, 2025
Browse files
[Misc] Fix the size of batched_dummy_mm_inputs in profile_run (#20434)
Signed-off-by:
bk-201
<
joy25810@foxmail.com
>
parent
9e0ef888
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
6 deletions
+9
-6
tests/models/registry.py
tests/models/registry.py
+2
-1
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+7
-5
No files found.
tests/models/registry.py
View file @
6bbf1795
...
@@ -412,7 +412,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -412,7 +412,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
hf_overrides
=
{
"architectures"
:
[
"QwenVLForConditionalGeneration"
]}),
# noqa: E501
hf_overrides
=
{
"architectures"
:
[
"QwenVLForConditionalGeneration"
]}),
# noqa: E501
"Qwen2AudioForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-Audio-7B-Instruct"
),
# noqa: E501
"Qwen2AudioForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-Audio-7B-Instruct"
),
# noqa: E501
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-VL-2B-Instruct"
),
# noqa: E501
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-VL-2B-Instruct"
),
# noqa: E501
"Qwen2_5_VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-VL-3B-Instruct"
),
# noqa: E501
"Qwen2_5_VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-VL-3B-Instruct"
,
# noqa: E501
max_model_len
=
4096
),
"Qwen2_5OmniModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-3B"
),
"Qwen2_5OmniModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-3B"
),
"Qwen2_5OmniForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-7B-AWQ"
),
# noqa: E501
"Qwen2_5OmniForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-7B-AWQ"
),
# noqa: E501
"SkyworkR1VChatModel"
:
_HfExamplesInfo
(
"Skywork/Skywork-R1V-38B"
),
"SkyworkR1VChatModel"
:
_HfExamplesInfo
(
"Skywork/Skywork-R1V-38B"
),
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
6bbf1795
...
@@ -2219,8 +2219,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -2219,8 +2219,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
encoder_budget
=
min
(
self
.
max_num_encoder_input_tokens
,
encoder_budget
=
min
(
self
.
max_num_encoder_input_tokens
,
self
.
encoder_cache_size
)
self
.
encoder_cache_size
)
max_num_mm_items_encoder_budget
=
cdiv
(
encoder_budget
,
max_num_mm_items_encoder_budget
=
encoder_budget
//
\
max_tokens_per_mm_item
)
max_tokens_per_mm_item
# Check how many items of this modality can be supported by
# Check how many items of this modality can be supported by
# the decoder budget.
# the decoder budget.
...
@@ -2233,8 +2233,10 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -2233,8 +2233,10 @@ class GPUModelRunner(LoRAModelRunnerMixin):
max_num_mm_items_decoder_budget
=
self
.
max_num_reqs
*
\
max_num_mm_items_decoder_budget
=
self
.
max_num_reqs
*
\
max_mm_items_per_req
max_mm_items_per_req
max_num_mm_items
=
min
(
max_num_mm_items_encoder_budget
,
max_num_mm_items
=
max
(
max_num_mm_items_decoder_budget
)
1
,
min
(
max_num_mm_items_encoder_budget
,
max_num_mm_items_decoder_budget
))
logger
.
info
(
logger
.
info
(
"Encoder cache will be initialized with a budget of %s tokens,"
"Encoder cache will be initialized with a budget of %s tokens,"
...
@@ -2244,7 +2246,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -2244,7 +2246,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
# Create dummy batch of multimodal inputs.
# Create dummy batch of multimodal inputs.
dummy_mm_kwargs
=
self
.
mm_registry
.
get_decoder_dummy_data
(
dummy_mm_kwargs
=
self
.
mm_registry
.
get_decoder_dummy_data
(
model_config
=
self
.
model_config
,
model_config
=
self
.
model_config
,
seq_len
=
self
.
max_num_tokens
,
seq_len
=
max_tokens_per_mm_item
,
mm_counts
=
{
mm_counts
=
{
dummy_data_modality
:
1
dummy_data_modality
:
1
},
},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment