Unverified Commit 49e44b21 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update feature extractor methods to enable type cast before normalize (#18499)

* Update methods to optionally rescale
This is necessary to allow for casting our images / videos to numpy arrays within the feature extractors' call. We want to do this to make sure the behaviour is as expected when flags like  are False. If some transformations aren't applied, then the output type can't be unexpected e.g. a list of PIL images instead of numpy arrays.

* Cast images to numpy arrays in call to enable consistent behaviour with different configs

* Remove accidental clip changes

* Update tests to reflect the scaling logic
We write a generic  function to handle rescaling of our arrays. In order for the API to be intuitive, we take some factor c and rescale the image values by that. This means, the rescaling done in normalize and to_numpy_array are now done with array * (1/255) instead of array / 255. This leads to small differences in the resulting image. When testing, this was in the order of 1e-8, and so deemed OK
parent 86d0b26d
...@@ -131,6 +131,13 @@ class ImageFeatureExtractionMixin: ...@@ -131,6 +131,13 @@ class ImageFeatureExtractionMixin:
return image.convert("RGB") return image.convert("RGB")
def rescale(self, image: np.ndarray, scale: Union[float, int]) -> np.ndarray:
"""
Rescale a numpy image by scale amount
"""
self._ensure_format_supported(image)
return image * scale
def to_numpy_array(self, image, rescale=None, channel_first=True): def to_numpy_array(self, image, rescale=None, channel_first=True):
""" """
Converts `image` to a numpy array. Optionally rescales it and puts the channel dimension as the first Converts `image` to a numpy array. Optionally rescales it and puts the channel dimension as the first
...@@ -153,11 +160,10 @@ class ImageFeatureExtractionMixin: ...@@ -153,11 +160,10 @@ class ImageFeatureExtractionMixin:
if is_torch_tensor(image): if is_torch_tensor(image):
image = image.numpy() image = image.numpy()
if rescale is None: rescale = isinstance(image.flat[0], np.integer) if rescale is None else rescale
rescale = isinstance(image.flat[0], np.integer)
if rescale: if rescale:
image = image.astype(np.float32) / 255.0 image = self.rescale(image.astype(np.float32), 1 / 255.0)
if channel_first and image.ndim == 3: if channel_first and image.ndim == 3:
image = image.transpose(2, 0, 1) image = image.transpose(2, 0, 1)
...@@ -184,7 +190,7 @@ class ImageFeatureExtractionMixin: ...@@ -184,7 +190,7 @@ class ImageFeatureExtractionMixin:
image = np.expand_dims(image, axis=0) image = np.expand_dims(image, axis=0)
return image return image
def normalize(self, image, mean, std): def normalize(self, image, mean, std, rescale=False):
""" """
Normalizes `image` with `mean` and `std`. Note that this will trigger a conversion of `image` to a NumPy array Normalizes `image` with `mean` and `std`. Note that this will trigger a conversion of `image` to a NumPy array
if it's a PIL Image. if it's a PIL Image.
...@@ -196,11 +202,21 @@ class ImageFeatureExtractionMixin: ...@@ -196,11 +202,21 @@ class ImageFeatureExtractionMixin:
The mean (per channel) to use for normalization. The mean (per channel) to use for normalization.
std (`List[float]` or `np.ndarray` or `torch.Tensor`): std (`List[float]` or `np.ndarray` or `torch.Tensor`):
The standard deviation (per channel) to use for normalization. The standard deviation (per channel) to use for normalization.
rescale (`bool`, *optional*, defaults to `False`):
Whether or not to rescale the image to be between 0 and 1. If a PIL image is provided, scaling will
happen automatically.
""" """
self._ensure_format_supported(image) self._ensure_format_supported(image)
if isinstance(image, PIL.Image.Image): if isinstance(image, PIL.Image.Image):
image = self.to_numpy_array(image) image = self.to_numpy_array(image, rescale=True)
# If the input image is a PIL image, it automatically gets rescaled. If it's another
# type it may need rescaling.
elif rescale:
if isinstance(image, np.ndarray):
image = self.rescale(image.astype(np.float32), 1 / 255.0)
elif is_torch_tensor(image):
image = self.rescale(image.float(), 1 / 255.0)
if isinstance(image, np.ndarray): if isinstance(image, np.ndarray):
if not isinstance(mean, np.ndarray): if not isinstance(mean, np.ndarray):
......
...@@ -58,13 +58,13 @@ class ImageFeatureExtractionTester(unittest.TestCase): ...@@ -58,13 +58,13 @@ class ImageFeatureExtractionTester(unittest.TestCase):
array3 = feature_extractor.to_numpy_array(image, rescale=False) array3 = feature_extractor.to_numpy_array(image, rescale=False)
self.assertTrue(array3.dtype, np.uint8) self.assertTrue(array3.dtype, np.uint8)
self.assertEqual(array3.shape, (3, 16, 32)) self.assertEqual(array3.shape, (3, 16, 32))
self.assertTrue(np.array_equal(array1, array3.astype(np.float32) / 255.0)) self.assertTrue(np.array_equal(array1, array3.astype(np.float32) * (1 / 255.0)))
# Conversion with no rescale and not channel first # Conversion with no rescale and not channel first
array4 = feature_extractor.to_numpy_array(image, rescale=False, channel_first=False) array4 = feature_extractor.to_numpy_array(image, rescale=False, channel_first=False)
self.assertTrue(array4.dtype, np.uint8) self.assertTrue(array4.dtype, np.uint8)
self.assertEqual(array4.shape, (16, 32, 3)) self.assertEqual(array4.shape, (16, 32, 3))
self.assertTrue(np.array_equal(array2, array4.astype(np.float32) / 255.0)) self.assertTrue(np.array_equal(array2, array4.astype(np.float32) * (1 / 255.0)))
def test_conversion_array_to_array(self): def test_conversion_array_to_array(self):
feature_extractor = ImageFeatureExtractionMixin() feature_extractor = ImageFeatureExtractionMixin()
...@@ -74,13 +74,13 @@ class ImageFeatureExtractionTester(unittest.TestCase): ...@@ -74,13 +74,13 @@ class ImageFeatureExtractionTester(unittest.TestCase):
array1 = feature_extractor.to_numpy_array(array) array1 = feature_extractor.to_numpy_array(array)
self.assertTrue(array1.dtype, np.float32) self.assertTrue(array1.dtype, np.float32)
self.assertEqual(array1.shape, (3, 16, 32)) self.assertEqual(array1.shape, (3, 16, 32))
self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) / 255.0)) self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) * (1 / 255.0)))
# Same with no permute # Same with no permute
array2 = feature_extractor.to_numpy_array(array, channel_first=False) array2 = feature_extractor.to_numpy_array(array, channel_first=False)
self.assertTrue(array2.dtype, np.float32) self.assertTrue(array2.dtype, np.float32)
self.assertEqual(array2.shape, (16, 32, 3)) self.assertEqual(array2.shape, (16, 32, 3))
self.assertTrue(np.array_equal(array2, array.astype(np.float32) / 255.0)) self.assertTrue(np.array_equal(array2, array.astype(np.float32) * (1 / 255.0)))
# Force rescale to False # Force rescale to False
array3 = feature_extractor.to_numpy_array(array, rescale=False) array3 = feature_extractor.to_numpy_array(array, rescale=False)
...@@ -110,13 +110,13 @@ class ImageFeatureExtractionTester(unittest.TestCase): ...@@ -110,13 +110,13 @@ class ImageFeatureExtractionTester(unittest.TestCase):
array1 = feature_extractor.to_numpy_array(array) array1 = feature_extractor.to_numpy_array(array)
self.assertTrue(array1.dtype, np.float32) self.assertTrue(array1.dtype, np.float32)
self.assertEqual(array1.shape, (3, 16, 32)) self.assertEqual(array1.shape, (3, 16, 32))
self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) / 255.0)) self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) * (1 / 255.0)))
# Same with no permute # Same with no permute
array2 = feature_extractor.to_numpy_array(array, channel_first=False) array2 = feature_extractor.to_numpy_array(array, channel_first=False)
self.assertTrue(array2.dtype, np.float32) self.assertTrue(array2.dtype, np.float32)
self.assertEqual(array2.shape, (16, 32, 3)) self.assertEqual(array2.shape, (16, 32, 3))
self.assertTrue(np.array_equal(array2, array.astype(np.float32) / 255.0)) self.assertTrue(np.array_equal(array2, array.astype(np.float32) * (1 / 255.0)))
# Force rescale to False # Force rescale to False
array3 = feature_extractor.to_numpy_array(array, rescale=False) array3 = feature_extractor.to_numpy_array(array, rescale=False)
...@@ -160,7 +160,7 @@ class ImageFeatureExtractionTester(unittest.TestCase): ...@@ -160,7 +160,7 @@ class ImageFeatureExtractionTester(unittest.TestCase):
self.assertTrue(np.array_equal(np.array(image2), array)) self.assertTrue(np.array_equal(np.array(image2), array))
# If the array has floating type, it's rescaled by default. # If the array has floating type, it's rescaled by default.
image3 = feature_extractor.to_pil_image(array.astype(np.float32) / 255.0) image3 = feature_extractor.to_pil_image(array.astype(np.float32) * (1 / 255.0))
self.assertTrue(isinstance(image3, PIL.Image.Image)) self.assertTrue(isinstance(image3, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image3), array)) self.assertTrue(np.array_equal(np.array(image3), array))
...@@ -170,7 +170,7 @@ class ImageFeatureExtractionTester(unittest.TestCase): ...@@ -170,7 +170,7 @@ class ImageFeatureExtractionTester(unittest.TestCase):
self.assertTrue(np.array_equal(np.array(image4), array)) self.assertTrue(np.array_equal(np.array(image4), array))
# And with floats + channel first. # And with floats + channel first.
image5 = feature_extractor.to_pil_image(array.transpose(2, 0, 1).astype(np.float32) / 255.0) image5 = feature_extractor.to_pil_image(array.transpose(2, 0, 1).astype(np.float32) * (1 / 255.0))
self.assertTrue(isinstance(image5, PIL.Image.Image)) self.assertTrue(isinstance(image5, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image5), array)) self.assertTrue(np.array_equal(np.array(image5), array))
...@@ -201,7 +201,7 @@ class ImageFeatureExtractionTester(unittest.TestCase): ...@@ -201,7 +201,7 @@ class ImageFeatureExtractionTester(unittest.TestCase):
self.assertTrue(np.array_equal(np.array(image4), array)) self.assertTrue(np.array_equal(np.array(image4), array))
# And with floats + channel first. # And with floats + channel first.
image5 = feature_extractor.to_pil_image(tensor.permute(2, 0, 1).float() / 255.0) image5 = feature_extractor.to_pil_image(tensor.permute(2, 0, 1).float() * (1 / 255.0))
self.assertTrue(isinstance(image5, PIL.Image.Image)) self.assertTrue(isinstance(image5, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image5), array)) self.assertTrue(np.array_equal(np.array(image5), array))
...@@ -316,7 +316,7 @@ class ImageFeatureExtractionTester(unittest.TestCase): ...@@ -316,7 +316,7 @@ class ImageFeatureExtractionTester(unittest.TestCase):
self.assertEqual(normalized_image.shape, (3, 16, 32)) self.assertEqual(normalized_image.shape, (3, 16, 32))
# During the conversion rescale and channel first will be applied. # During the conversion rescale and channel first will be applied.
expected = array.transpose(2, 0, 1).astype(np.float32) / 255.0 expected = array.transpose(2, 0, 1).astype(np.float32) * (1 / 255.0)
np_mean = np.array(mean).astype(np.float32)[:, None, None] np_mean = np.array(mean).astype(np.float32)[:, None, None]
np_std = np.array(std).astype(np.float32)[:, None, None] np_std = np.array(std).astype(np.float32)[:, None, None]
expected = (expected - np_mean) / np_std expected = (expected - np_mean) / np_std
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment