Unverified Commit b3cf368d authored by lkchen's avatar lkchen Committed by GitHub
Browse files

[V1][Molmo] Fix get_multimodal_embeddings() in molmo.py (#14161)

parent c8525f06
......@@ -476,7 +476,9 @@ class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
return result
def get_multimodal_embeddings(self, **kwargs) -> Optional[NestedTensors]:
def get_multimodal_embeddings(
self, **kwargs
) -> Union[list[torch.Tensor], torch.Tensor, tuple[torch.Tensor, ...]]:
audio_input = self._parse_and_validate_audio_input(**kwargs)
if audio_input is None:
return None
......
......@@ -692,7 +692,9 @@ class WhisperForConditionalGeneration(nn.Module, SupportsTranscription,
)
return decoder_outputs
def get_multimodal_embeddings(self, **kwargs) -> Optional[NestedTensors]:
def get_multimodal_embeddings(
self, **kwargs
) -> Union[list[torch.Tensor], torch.Tensor, tuple[torch.Tensor, ...]]:
# TODO: This method does not obey the interface for SupportsMultiModal.
# Refactor this once encoder/decoder support is implemented in V1.
audio_input = self._parse_and_validate_audio_input(**kwargs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment