Unverified Commit 30409af6 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update InstructBLIP & Align values after rescale update (#25209)

* Update InstructBLIP values
Note: the tests are not independent. Running the test independentely produces different logits compared to running all the integration tests

* Update test values after rescale update

* Remove left over commented out code

* Revert to previous rescaling logic

* Update rescale tests
parent 15082a9d
...@@ -155,10 +155,11 @@ class EfficientNetImageProcessor(BaseImageProcessor): ...@@ -155,10 +155,11 @@ class EfficientNetImageProcessor(BaseImageProcessor):
""" """
Rescale an image by a scale factor. Rescale an image by a scale factor.
If offset is True, the image is rescaled between [-1, 1]. If `offset` is `True`, the image has its values rescaled by `scale` and then offset by 1. If `scale` is
image = image * scale * 2 - 1 1/127.5, the image is rescaled between [-1, 1].
image = image * scale - 1
If offset is False, the image is rescaled between [0, 1]. If `offset` is `False`, and `scale` is 1/255, the image is rescaled between [0, 1].
image = image * scale image = image * scale
Args: Args:
...@@ -171,7 +172,6 @@ class EfficientNetImageProcessor(BaseImageProcessor): ...@@ -171,7 +172,6 @@ class EfficientNetImageProcessor(BaseImageProcessor):
data_format (`str` or `ChannelDimension`, *optional*): data_format (`str` or `ChannelDimension`, *optional*):
The channel dimension format of the image. If not provided, it will be the same as the input image. The channel dimension format of the image. If not provided, it will be the same as the input image.
""" """
scale = scale * 2 if offset else scale
rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs) rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs)
if offset: if offset:
......
...@@ -179,10 +179,11 @@ class VivitImageProcessor(BaseImageProcessor): ...@@ -179,10 +179,11 @@ class VivitImageProcessor(BaseImageProcessor):
""" """
Rescale an image by a scale factor. Rescale an image by a scale factor.
If offset is True, the image is rescaled between [-1, 1]. If `offset` is `True`, the image has its values rescaled by `scale` and then offset by 1. If `scale` is
image = image * scale * 2 - 1 1/127.5, the image is rescaled between [-1, 1].
image = image * scale - 1
If offset is False, the image is rescaled between [0, 1]. If `offset` is `False`, and `scale` is 1/255, the image is rescaled between [0, 1].
image = image * scale image = image * scale
Args: Args:
...@@ -195,7 +196,6 @@ class VivitImageProcessor(BaseImageProcessor): ...@@ -195,7 +196,6 @@ class VivitImageProcessor(BaseImageProcessor):
data_format (`str` or `ChannelDimension`, *optional*): data_format (`str` or `ChannelDimension`, *optional*):
The channel dimension format of the image. If not provided, it will be the same as the input image. The channel dimension format of the image. If not provided, it will be the same as the input image.
""" """
scale = scale * 2 if offset else scale
rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs) rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs)
if offset: if offset:
......
...@@ -200,8 +200,8 @@ class EfficientNetImageProcessorTest(ImageProcessingSavingTestMixin, unittest.Te ...@@ -200,8 +200,8 @@ class EfficientNetImageProcessorTest(ImageProcessingSavingTestMixin, unittest.Te
image_processor = self.image_processing_class(**self.image_processor_dict) image_processor = self.image_processing_class(**self.image_processor_dict)
rescaled_image = image_processor.rescale(image, scale=1 / 255) rescaled_image = image_processor.rescale(image, scale=1 / 127.5)
expected_image = (image * (2 / 255.0)).astype(np.float32) - 1 expected_image = (image * (1 / 127.5)).astype(np.float32) - 1
self.assertTrue(np.allclose(rescaled_image, expected_image)) self.assertTrue(np.allclose(rescaled_image, expected_image))
rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False) rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False)
......
...@@ -538,7 +538,7 @@ class InstructBlipModelIntegrationTest(unittest.TestCase): ...@@ -538,7 +538,7 @@ class InstructBlipModelIntegrationTest(unittest.TestCase):
logits = model(**inputs).logits logits = model(**inputs).logits
expected_slice = torch.tensor( expected_slice = torch.tensor(
[[-3.5020, -12.3281, 8.4453], [-5.1406, -11.9609, 7.8711], [-4.0430, -13.4375, 9.1172]], [[-3.4727, -11.8203, 8.3828], [-5.1172, -11.3438, 7.7656], [-4.0742, -13.4688, 9.1953]],
device=torch_device, device=torch_device,
) )
self.assertTrue(torch.allclose(logits[0, :3, :3].float(), expected_slice, atol=1e-3)) self.assertTrue(torch.allclose(logits[0, :3, :3].float(), expected_slice, atol=1e-3))
...@@ -548,12 +548,12 @@ class InstructBlipModelIntegrationTest(unittest.TestCase): ...@@ -548,12 +548,12 @@ class InstructBlipModelIntegrationTest(unittest.TestCase):
generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip() generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
# fmt: off # fmt: off
expected_outputs = [ 2, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 1623, 263, 19587, 4272, 11952, 29889] expected_outputs = [2, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 373, 263, 19587, 4272, 11952, 29889]
# fmt: on # fmt: on
self.assertEqual(outputs[0].tolist(), expected_outputs) self.assertEqual(outputs[0].tolist(), expected_outputs)
self.assertEqual( self.assertEqual(
generated_text, generated_text,
"The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving down a busy city street.", "The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving on a busy city street.",
) )
def test_inference_flant5_xl(self): def test_inference_flant5_xl(self):
......
...@@ -219,8 +219,8 @@ class VivitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase ...@@ -219,8 +219,8 @@ class VivitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase
image_processor = self.image_processing_class(**self.image_processor_dict) image_processor = self.image_processing_class(**self.image_processor_dict)
rescaled_image = image_processor.rescale(image, scale=1 / 255) rescaled_image = image_processor.rescale(image, scale=1 / 127.5)
expected_image = (image * (2 / 255.0)).astype(np.float32) - 1 expected_image = (image * (1 / 127.5)).astype(np.float32) - 1
self.assertTrue(np.allclose(rescaled_image, expected_image)) self.assertTrue(np.allclose(rescaled_image, expected_image))
rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False) rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment