Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
787b84a9
Unverified
Commit
787b84a9
authored
Dec 03, 2025
by
Roger Wang
Committed by
GitHub
Dec 03, 2025
Browse files
[Bugfix] Follow-up fix on MediaWithBytes (#29951)
Signed-off-by:
Roger Wang
<
hey@rogerw.io
>
parent
42c19496
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
2 deletions
+5
-2
vllm/multimodal/base.py
vllm/multimodal/base.py
+2
-0
vllm/multimodal/inputs.py
vllm/multimodal/inputs.py
+2
-1
vllm/multimodal/parse.py
vllm/multimodal/parse.py
+1
-1
No files found.
vllm/multimodal/base.py
View file @
787b84a9
...
@@ -21,6 +21,8 @@ class MediaWithBytes(Generic[_T]):
...
@@ -21,6 +21,8 @@ class MediaWithBytes(Generic[_T]):
The wrapper delegates attribute access to the underlying media object,
The wrapper delegates attribute access to the underlying media object,
making it behave transparently like the wrapped type (e.g., PIL.Image).
making it behave transparently like the wrapped type (e.g., PIL.Image).
NOTE: Currently, this wrapper is used only for the image modality.
"""
"""
media
:
_T
media
:
_T
...
...
vllm/multimodal/inputs.py
View file @
787b84a9
...
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
...
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
from
PIL.Image
import
Image
from
PIL.Image
import
Image
from
transformers.feature_extraction_utils
import
BatchFeature
from
transformers.feature_extraction_utils
import
BatchFeature
from
.base
import
MediaWithBytes
from
.processing
import
MultiModalHashes
from
.processing
import
MultiModalHashes
else
:
else
:
...
@@ -59,7 +60,7 @@ Represents a single audio
...
@@ -59,7 +60,7 @@ Represents a single audio
item, which can be passed to a HuggingFace `AudioProcessor`.
item, which can be passed to a HuggingFace `AudioProcessor`.
"""
"""
ImageItem
:
TypeAlias
=
Union
[
HfImageItem
,
"torch.Tensor"
]
ImageItem
:
TypeAlias
=
Union
[
HfImageItem
,
"torch.Tensor"
,
"MediaWithBytes[HfImageItem]"
]
"""
"""
A `transformers.image_utils.ImageInput` representing a single image
A `transformers.image_utils.ImageInput` representing a single image
item, which can be passed to a HuggingFace `ImageProcessor`.
item, which can be passed to a HuggingFace `ImageProcessor`.
...
...
vllm/multimodal/parse.py
View file @
787b84a9
...
@@ -484,7 +484,7 @@ class MultiModalDataParser:
...
@@ -484,7 +484,7 @@ class MultiModalDataParser:
return
ImageEmbeddingItems
(
data
)
return
ImageEmbeddingItems
(
data
)
if
(
if
(
isinstance
(
data
,
PILImage
.
Image
)
isinstance
(
data
,
(
PILImage
.
Image
,
MediaWithBytes
)
)
or
isinstance
(
data
,
(
np
.
ndarray
,
torch
.
Tensor
))
or
isinstance
(
data
,
(
np
.
ndarray
,
torch
.
Tensor
))
and
data
.
ndim
==
3
and
data
.
ndim
==
3
):
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment