# Using the mean and variance of the ImageNet dataset for all input images can lead to accuracy issues, while using the mean and variance of each input image is a more accurate choice.
mean=img.mean(dim=[1,2],keepdim=True)
# Prevent division by zero; clamp to minimum value of 1e-6
@@ -201,10 +217,6 @@ class InternVLImageProcessor(BaseMultimodalProcessor):
...
@@ -201,10 +217,6 @@ class InternVLImageProcessor(BaseMultimodalProcessor):
else:
else:
tensor=image.cuda()# assume already tensor
tensor=image.cuda()# assume already tensor
# Using the mean and variance of the ImageNet dataset for all input images can lead to accuracy issues, while using the mean and variance of each input image is a more accurate choice.
mean=tensor.mean(dim=[1,2],keepdim=True)
# Prevent division by zero; clamp to minimum value of 1e-6