Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
04139ade
Unverified
Commit
04139ade
authored
Dec 20, 2024
by
Roger Wang
Committed by
GitHub
Dec 20, 2024
Browse files
[V1] Fix profiling for models with merged input processor (#11370)
Signed-off-by:
ywang96
<
ywang@roblox.com
>
parent
1ecc645b
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
12 deletions
+32
-12
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+32
-12
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
04139ade
...
...
@@ -635,17 +635,6 @@ class GPUModelRunner:
)
dummy_mm_data
=
dummy_request_data
.
multi_modal_data
# Compute MM hashes (if enabled)
mm_hashes
=
None
if
self
.
use_hash
:
mm_hashes
=
self
.
mm_hasher
.
hash_dummy_mm_data
(
dummy_mm_data
)
dummy_mm_kwargs
=
self
.
mm_input_mapper_client
.
process_inputs
(
mm_data
=
dummy_mm_data
,
mm_hashes
=
mm_hashes
,
mm_processor_kwargs
=
None
,
precomputed_mm_inputs
=
None
)
# NOTE: Currently model is profiled with a single non-text
# modality even when it supports multiple.
max_tokens_per_mm_item
=
max
(
...
...
@@ -660,8 +649,39 @@ class GPUModelRunner:
# (e.g, multiple images) for a single request, therefore here we
# always replicate first item by max_num_mm_items times since in V1
# they are scheduled to be processed separately.
# Case when models have a merged processor, their dummy data is
# already batched `MultiModalKwargs`, therefore we need to "unbatch"
# and take the first item in each batched tensor.
# TODO (ywang96): This is somewhat hacky. Refactor this to be
# consistent with the other case.
if
isinstance
(
dummy_mm_data
,
MultiModalKwargs
):
dummy_mm_kwargs
=
{
k
:
v
[
0
].
unsqueeze
(
0
)
for
k
,
v
in
dummy_mm_data
.
items
()
}
# Case when models have dummy data explicitly defined as
# `MultiModalDataDict`, so they need to be processed through input
# mapper.
else
:
# Compute MM hashes (if enabled)
mm_hashes
=
None
if
self
.
use_hash
:
mm_hashes
=
self
.
mm_hasher
.
hash_dummy_mm_data
(
dummy_mm_data
)
mm_kwargs_list
=
self
.
mm_input_mapper_client
.
process_inputs
(
mm_data
=
dummy_mm_data
,
mm_hashes
=
mm_hashes
,
mm_processor_kwargs
=
None
,
precomputed_mm_inputs
=
None
)
# Take the first `MultiModalKwargs`
dummy_mm_kwargs
=
mm_kwargs_list
[
0
]
batched_dummy_mm_inputs
=
MultiModalKwargs
.
batch
(
[
dummy_mm_kwargs
[
0
]
]
*
max_num_mm_items
)
[
dummy_mm_kwargs
]
*
max_num_mm_items
)
batched_dummy_mm_inputs
=
MultiModalKwargs
.
as_kwargs
(
batched_dummy_mm_inputs
,
device
=
self
.
device
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment