Unverified Commit 787b84a9 authored by Roger Wang's avatar Roger Wang Committed by GitHub
Browse files

[Bugfix] Follow-up fix on MediaWithBytes (#29951)


Signed-off-by: default avatarRoger Wang <hey@rogerw.io>
parent 42c19496
...@@ -21,6 +21,8 @@ class MediaWithBytes(Generic[_T]): ...@@ -21,6 +21,8 @@ class MediaWithBytes(Generic[_T]):
The wrapper delegates attribute access to the underlying media object, The wrapper delegates attribute access to the underlying media object,
making it behave transparently like the wrapped type (e.g., PIL.Image). making it behave transparently like the wrapped type (e.g., PIL.Image).
NOTE: Currently, this wrapper is used only for the image modality.
""" """
media: _T media: _T
......
...@@ -32,6 +32,7 @@ if TYPE_CHECKING: ...@@ -32,6 +32,7 @@ if TYPE_CHECKING:
from PIL.Image import Image from PIL.Image import Image
from transformers.feature_extraction_utils import BatchFeature from transformers.feature_extraction_utils import BatchFeature
from .base import MediaWithBytes
from .processing import MultiModalHashes from .processing import MultiModalHashes
else: else:
...@@ -59,7 +60,7 @@ Represents a single audio ...@@ -59,7 +60,7 @@ Represents a single audio
item, which can be passed to a HuggingFace `AudioProcessor`. item, which can be passed to a HuggingFace `AudioProcessor`.
""" """
ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor"] ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor", "MediaWithBytes[HfImageItem]"]
""" """
A `transformers.image_utils.ImageInput` representing a single image A `transformers.image_utils.ImageInput` representing a single image
item, which can be passed to a HuggingFace `ImageProcessor`. item, which can be passed to a HuggingFace `ImageProcessor`.
......
...@@ -484,7 +484,7 @@ class MultiModalDataParser: ...@@ -484,7 +484,7 @@ class MultiModalDataParser:
return ImageEmbeddingItems(data) return ImageEmbeddingItems(data)
if ( if (
isinstance(data, PILImage.Image) isinstance(data, (PILImage.Image, MediaWithBytes))
or isinstance(data, (np.ndarray, torch.Tensor)) or isinstance(data, (np.ndarray, torch.Tensor))
and data.ndim == 3 and data.ndim == 3
): ):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment