Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e30859df
Unverified
Commit
e30859df
authored
Nov 26, 2025
by
Cyrus Leung
Committed by
GitHub
Nov 26, 2025
Browse files
[Bugfix] Fix handling of image embeds in models (#29480)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
452a7c9f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
4 additions
and
15 deletions
+4
-15
vllm/model_executor/models/deepseek_vl2.py
vllm/model_executor/models/deepseek_vl2.py
+2
-13
vllm/model_executor/models/llava_next.py
vllm/model_executor/models/llava_next.py
+1
-1
vllm/model_executor/models/llava_onevision.py
vllm/model_executor/models/llava_onevision.py
+1
-1
No files found.
vllm/model_executor/models/deepseek_vl2.py
View file @
e30859df
...
@@ -48,7 +48,6 @@ from vllm.transformers_utils.configs.deepseek_vl2 import (
...
@@ -48,7 +48,6 @@ from vllm.transformers_utils.configs.deepseek_vl2 import (
)
)
from
vllm.transformers_utils.processors.deepseek_vl2
import
DeepseekVLV2Processor
from
vllm.transformers_utils.processors.deepseek_vl2
import
DeepseekVLV2Processor
from
vllm.transformers_utils.tokenizer
import
cached_tokenizer_from_config
from
vllm.transformers_utils.tokenizer
import
cached_tokenizer_from_config
from
vllm.utils.collection_utils
import
is_list_of
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.torch_utils
import
set_default_torch_dtype
from
vllm.utils.torch_utils
import
set_default_torch_dtype
...
@@ -595,19 +594,9 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
...
@@ -595,19 +594,9 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
def
_process_image_input
(
def
_process_image_input
(
self
,
image_input
:
DeepseekVL2ImageInputs
self
,
image_input
:
DeepseekVL2ImageInputs
)
->
list
[
torch
.
Tensor
]:
)
->
torch
.
Tensor
|
list
[
torch
.
Tensor
]:
if
image_input
[
"type"
]
==
"image_embeds"
:
if
image_input
[
"type"
]
==
"image_embeds"
:
image_data
=
image_input
[
"data"
]
return
image_input
[
"data"
]
if
is_list_of
(
image_data
,
torch
.
Tensor
):
# it's already a list of tensors
return
image_data
if
len
(
image_data
.
shape
)
==
3
:
# 3D tensor
return
list
(
torch
.
unbind
(
image_data
,
dim
=
0
))
raise
ValueError
(
"We expect batched 2D tensors; "
"this can be either a list of 2D tensors or a single 3D tensor."
)
pixel_values
=
image_input
[
"data"
]
pixel_values
=
image_input
[
"data"
]
images_spatial_crop
=
image_input
[
"images_spatial_crop"
]
images_spatial_crop
=
image_input
[
"images_spatial_crop"
]
...
...
vllm/model_executor/models/llava_next.py
View file @
e30859df
...
@@ -460,7 +460,7 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP
...
@@ -460,7 +460,7 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP
image_input
:
LlavaNextImageInputs
,
image_input
:
LlavaNextImageInputs
,
)
->
torch
.
Tensor
|
list
[
torch
.
Tensor
]:
)
->
torch
.
Tensor
|
list
[
torch
.
Tensor
]:
if
image_input
[
"type"
]
==
"image_embeds"
:
if
image_input
[
"type"
]
==
"image_embeds"
:
return
[
image_input
[
"data"
]
]
return
image_input
[
"data"
]
patch_embeddings
=
self
.
_process_image_pixels
(
image_input
)
patch_embeddings
=
self
.
_process_image_pixels
(
image_input
)
...
...
vllm/model_executor/models/llava_onevision.py
View file @
e30859df
...
@@ -763,7 +763,7 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, Supp
...
@@ -763,7 +763,7 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, Supp
image_input
:
LlavaOnevisionImageInputs
,
image_input
:
LlavaOnevisionImageInputs
,
)
->
torch
.
Tensor
|
list
[
torch
.
Tensor
]:
)
->
torch
.
Tensor
|
list
[
torch
.
Tensor
]:
if
image_input
[
"type"
]
==
"image_embeds"
:
if
image_input
[
"type"
]
==
"image_embeds"
:
return
[
image_input
[
"data"
]
]
return
image_input
[
"data"
]
patch_embeddings
=
self
.
_process_image_pixels
(
image_input
)
patch_embeddings
=
self
.
_process_image_pixels
(
image_input
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment