"vscode:/vscode.git/clone" did not exist on "aa3868ecfe65036349412ce54fa5b07cb545d836"
Unverified Commit 0a298ea4 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Bugfix] Fix no video/image profiling edge case for `MultiModalDataParser` (#15828)


Signed-off-by: default avatarIsotr0py <2037008807@qq.com>
parent d330558b
......@@ -295,7 +295,7 @@ class MultiModalDataItems(UserDict[str, ModalityDataItems[Any, Any]]):
ModalityDataParser: TypeAlias = Callable[[ModalityData[Any]],
ModalityDataItems[Any, Any]]
Optional[ModalityDataItems[Any, Any]]]
class MultiModalDataParser:
......@@ -319,7 +319,15 @@ class MultiModalDataParser:
if isinstance(data, torch.Tensor):
return data.ndim == 3
if is_list_of(data, torch.Tensor):
return len(data) == 0 or data[0].ndim == 2
return data[0].ndim == 2
return False
def _is_empty(self, data: object) -> TypeGuard[None]:
if isinstance(data, list):
return len(data) == 0
if isinstance(data, (np.ndarray, torch.Tensor)):
return data.size == 0
return False
......@@ -341,7 +349,12 @@ class MultiModalDataParser:
def _parse_audio_data(
self,
data: ModalityData[AudioItem],
) -> ModalityDataItems[Any, Any]:
) -> Optional[ModalityDataItems[Any, Any]]:
# also check single audio item with sampling rate
if self._is_empty(data) or (isinstance(data, tuple)
and self._is_empty(data[0])):
return None
if self._is_embeddings(data):
return AudioEmbeddingItems(data)
......@@ -378,7 +391,10 @@ class MultiModalDataParser:
def _parse_image_data(
self,
data: ModalityData[ImageItem],
) -> ModalityDataItems[Any, Any]:
) -> Optional[ModalityDataItems[Any, Any]]:
if self._is_empty(data):
return None
if self._is_embeddings(data):
return ImageEmbeddingItems(data)
......@@ -396,7 +412,10 @@ class MultiModalDataParser:
def _parse_video_data(
self,
data: ModalityData[VideoItem],
) -> ModalityDataItems[Any, Any]:
) -> Optional[ModalityDataItems[Any, Any]]:
if self._is_empty(data):
return None
if self._is_embeddings(data):
return VideoEmbeddingItems(data)
......@@ -427,6 +446,8 @@ class MultiModalDataParser:
if k not in subparsers:
raise ValueError(f"Unsupported modality: {k}")
mm_items[k] = subparsers[k](v)
# ignore empty embedding data
if (parsed_data := subparsers[k](v)) is not None:
mm_items[k] = parsed_data
return mm_items
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment