Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4fb349f6
Unverified
Commit
4fb349f6
authored
May 18, 2025
by
Lifu Huang
Committed by
GitHub
May 18, 2025
Browse files
Fix copy-paste error in phi4mm image processing (#18315)
Signed-off-by:
Lifu Huang
<
lifu.hlf@gmail.com
>
parent
908733ac
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
19 deletions
+7
-19
vllm/model_executor/models/phi4mm.py
vllm/model_executor/models/phi4mm.py
+7
-19
No files found.
vllm/model_executor/models/phi4mm.py
View file @
4fb349f6
...
@@ -415,15 +415,6 @@ class Phi4MMImagePixelInputs(TypedDict):
...
@@ -415,15 +415,6 @@ class Phi4MMImagePixelInputs(TypedDict):
"""Shape: `(batch_size * num_images, H_mask, W_mask)`"""
"""Shape: `(batch_size * num_images, H_mask, W_mask)`"""
class
Phi4MMImageEmbeddingInputs
(
TypedDict
):
type
:
Literal
[
"image_embeds"
]
data
:
Union
[
torch
.
Tensor
,
list
[
torch
.
Tensor
]]
"""Shape: `(batch_size * num_images, image_feature_size, hidden_size)`
`hidden_size` must match the hidden size of language model backbone.
"""
class
Phi4MMAudioFeatureInputs
(
TypedDict
):
class
Phi4MMAudioFeatureInputs
(
TypedDict
):
type
:
Literal
[
"audio_features"
]
type
:
Literal
[
"audio_features"
]
data
:
Union
[
torch
.
Tensor
,
list
[
torch
.
Tensor
]]
data
:
Union
[
torch
.
Tensor
,
list
[
torch
.
Tensor
]]
...
@@ -436,7 +427,6 @@ class Phi4MMAudioEmbeddingInputs(TypedDict):
...
@@ -436,7 +427,6 @@ class Phi4MMAudioEmbeddingInputs(TypedDict):
"""Shape: `(batch_size, num_audios, audio_feature_size, hidden_size)"""
"""Shape: `(batch_size, num_audios, audio_feature_size, hidden_size)"""
Phi4MMImageInput
=
Union
[
Phi4MMImagePixelInputs
,
Phi4MMImageEmbeddingInputs
]
Phi4MMAudioInputs
=
Union
[
Phi4MMAudioFeatureInputs
,
Phi4MMAudioEmbeddingInputs
]
Phi4MMAudioInputs
=
Union
[
Phi4MMAudioFeatureInputs
,
Phi4MMAudioEmbeddingInputs
]
...
@@ -1112,15 +1102,13 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
...
@@ -1112,15 +1102,13 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
def
_process_image_input
(
def
_process_image_input
(
self
,
image_input
:
Phi4MMImagePixelInputs
)
->
list
[
torch
.
Tensor
]:
self
,
image_input
:
Phi4MMImagePixelInputs
)
->
list
[
torch
.
Tensor
]:
if
image_input
[
"type"
]
==
"image_embeds"
:
image_embeds
=
image_input
[
"image_embeds"
].
type
(
self
.
visual
.
dtype
)
dtype
=
next
(
self
.
vision_encoder
.
parameters
()).
dtype
else
:
pixel_values
=
image_input
[
'data'
].
to
(
dtype
)
dtype
=
next
(
self
.
vision_encoder
.
parameters
()).
dtype
image_sizes
=
image_input
[
'image_sizes'
]
pixel_values
=
image_input
[
'data'
].
to
(
dtype
)
image_attention_mask
=
image_input
[
'image_attention_mask'
]
image_sizes
=
image_input
[
'image_sizes'
]
image_embeds
=
self
.
vision_encoder
(
pixel_values
,
image_sizes
,
image_attention_mask
=
image_input
[
'image_attention_mask'
]
image_attention_mask
)
image_embeds
=
self
.
vision_encoder
(
pixel_values
,
image_sizes
,
image_attention_mask
)
return
image_embeds
return
image_embeds
def
get_multimodal_embeddings
(
def
get_multimodal_embeddings
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment