Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b50602d5
Unverified
Commit
b50602d5
authored
May 27, 2025
by
Lukas Geiger
Committed by
GitHub
May 27, 2025
Browse files
[Model][Gemma3] Cast image pixel values already on CPU (#18732)
Signed-off-by:
Lukas Geiger
<
lukas.geiger94@gmail.com
>
parent
1f1b1bc0
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
3 deletions
+6
-3
vllm/model_executor/models/gemma3_mm.py
vllm/model_executor/models/gemma3_mm.py
+6
-3
No files found.
vllm/model_executor/models/gemma3_mm.py
View file @
b50602d5
...
...
@@ -263,6 +263,11 @@ class Gemma3MultiModalProcessor(BaseMultiModalProcessor[Gemma3ProcessingInfo]):
mm_data
,
mm_kwargs
,
)
if
"pixel_values"
in
processed_outputs
:
# Cast pixel values to model dtype already here,
# so we need to transfer less data to the GPU
processed_outputs
[
"pixel_values"
]
=
processed_outputs
[
"pixel_values"
].
to
(
self
.
info
.
ctx
.
model_config
.
dtype
)
# HF processor pops the `num_crops` kwarg, which is needed by vLLM
if
(
images
:
=
mm_data
.
get
(
"images"
))
is
not
None
:
...
...
@@ -543,9 +548,7 @@ class Gemma3ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP,
vision_tower
:
SiglipVisionModel
,
pixel_values
:
torch
.
Tensor
,
)
->
torch
.
Tensor
:
target_dtype
=
vision_tower
.
get_input_embeddings
().
weight
.
dtype
image_features
=
vision_tower
(
pixel_values
.
to
(
dtype
=
target_dtype
))
return
image_features
return
vision_tower
(
pixel_values
)
def
_process_image_input
(
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment