Unverified Commit 513298f1 authored by Yuekai Zhang's avatar Yuekai Zhang Committed by GitHub
Browse files

[Bugfix] fix bf16 multimodal model hash (#23623)


Signed-off-by: default avatarYuekai Zhang <zhangyuekai@foxmail.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: default avatarRoger Wang <hey@rogerw.io>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent 379f828f
...@@ -43,7 +43,19 @@ class MultiModalHasher: ...@@ -43,7 +43,19 @@ class MultiModalHasher:
return cls.item_to_bytes( return cls.item_to_bytes(
"image", np.asarray(convert_image_mode(obj, "RGBA"))) "image", np.asarray(convert_image_mode(obj, "RGBA")))
if isinstance(obj, torch.Tensor): if isinstance(obj, torch.Tensor):
return cls.item_to_bytes("tensor", obj.cpu().numpy()) tensor_obj: torch.Tensor = obj.cpu()
tensor_dtype = tensor_obj.dtype
if tensor_dtype == torch.bfloat16:
tensor_obj = tensor_obj.contiguous()
tensor_obj = tensor_obj.view(
(tensor_obj.numel(), )).view(torch.uint8)
return cls.item_to_bytes(
"tensor", {
"original_dtype": str(tensor_dtype),
"original_shape": tuple(tensor_obj.shape),
"data": tensor_obj.numpy()
})
return cls.item_to_bytes("tensor", tensor_obj.numpy())
if isinstance(obj, np.ndarray): if isinstance(obj, np.ndarray):
# If the array is non-contiguous, we need to copy it first # If the array is non-contiguous, we need to copy it first
arr_data = obj.data if obj.flags.c_contiguous else obj.tobytes() arr_data = obj.data if obj.flags.c_contiguous else obj.tobytes()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment