Unverified Commit 1b2381c4 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

ImageProcessor - check if input pixel values between 0-255 (#25688)

* Check if pixel values between 0-255 and add doc clarification

* Add missing docstrings

* _is_scale_image -> is_scaled_image

* Spelling is hard

* Tidy up
parent 7a6efe1e
......@@ -96,6 +96,17 @@ def is_batched(img):
return False
def is_scaled_image(image: np.ndarray) -> bool:
"""
Checks to see whether the pixel values have already been rescaled to [0, 1].
"""
if image.dtype == np.uint8:
return False
# It's possible the image has pixel values in [0, 255] but is of floating type
return np.min(image) >= 0 and np.max(image) <= 1
def make_list_of_images(images, expected_ndims: int = 3) -> List[ImageInput]:
"""
Ensure that the input is a list of images. If the input is a single image, it is converted to a list of length 1.
......
......@@ -28,6 +28,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -236,6 +237,11 @@ class BeitImageProcessor(BaseImageProcessor):
"""Preprocesses a single image."""
# All transformations expect numpy arrays.
image = to_numpy_array(image)
if is_scaled_image(image) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
input_data_format = infer_channel_dimension_format(image)
image = self._preprocess(
......@@ -328,7 +334,8 @@ class BeitImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......
......@@ -32,6 +32,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -184,7 +185,8 @@ class BitImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -270,6 +272,12 @@ class BitImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -27,6 +27,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -176,7 +177,8 @@ class BlipImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -253,6 +255,12 @@ class BlipImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -29,6 +29,7 @@ from ...image_utils import (
get_image_size,
infer_channel_dimension_format,
is_batched,
is_scaled_image,
to_numpy_array,
valid_images,
)
......@@ -387,7 +388,8 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -469,6 +471,12 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if do_resize:
images = [
self.resize(
......
......@@ -32,6 +32,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -182,7 +183,8 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -268,6 +270,12 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -32,6 +32,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -183,7 +184,8 @@ class CLIPImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -269,6 +271,12 @@ class CLIPImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -42,6 +42,7 @@ from ...image_utils import (
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
......@@ -1126,7 +1127,8 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image or batch of images to preprocess.
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:
......@@ -1259,6 +1261,12 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -32,6 +32,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -203,7 +204,8 @@ class ConvNextImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -280,6 +282,12 @@ class ConvNextImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -42,6 +42,7 @@ from ...image_utils import (
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
......@@ -1124,7 +1125,8 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image or batch of images to preprocess.
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:
......@@ -1257,6 +1259,12 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -27,6 +27,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -180,7 +181,8 @@ class DeiTImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -258,6 +260,12 @@ class DeiTImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -40,6 +40,7 @@ from ...image_utils import (
get_image_size,
infer_channel_dimension_format,
is_batched,
is_scaled_image,
to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
......@@ -796,7 +797,8 @@ class DetaImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image or batch of images to preprocess.
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`List[Dict]` or `List[List[Dict]]`, *optional*):
List of annotations associated with the image or batch of images. If annotionation is for object
detection, the annotations should be a dictionary with the following keys:
......@@ -921,6 +923,12 @@ class DetaImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -41,6 +41,7 @@ from ...image_utils import (
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
......@@ -1096,7 +1097,8 @@ class DetrImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image or batch of images to preprocess.
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:
......@@ -1229,6 +1231,12 @@ class DetrImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -33,6 +33,7 @@ from ...image_utils import (
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -319,7 +320,8 @@ class DonutImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -407,6 +409,12 @@ class DonutImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -29,6 +29,7 @@ from ...image_utils import (
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
is_torch_available,
is_torch_tensor,
make_list_of_images,
......@@ -230,7 +231,8 @@ class DPTImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -305,6 +307,12 @@ class DPTImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -32,6 +32,7 @@ from ...image_utils import (
PILImageResampling,
infer_channel_dimension_format,
is_batched,
is_scaled_image,
to_numpy_array,
valid_images,
)
......@@ -179,7 +180,8 @@ class EfficientFormerImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -256,6 +258,12 @@ class EfficientFormerImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -27,6 +27,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -231,7 +232,8 @@ class EfficientNetImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -315,6 +317,12 @@ class EfficientNetImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -30,6 +30,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -414,6 +415,12 @@ class FlavaImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
image = to_numpy_array(image)
if is_scaled_image(image) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(image)
......@@ -481,7 +488,8 @@ class FlavaImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......
......@@ -26,6 +26,7 @@ from ...image_utils import (
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -137,7 +138,8 @@ class GLPNImageProcessor(BaseImageProcessor):
Args:
images (`PIL.Image.Image` or `TensorType` or `List[np.ndarray]` or `List[TensorType]`):
The image or images to preprocess.
Images to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_normalize=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the input such that the (height, width) dimensions are a multiple of `size_divisor`.
size_divisor (`int`, *optional*, defaults to `self.size_divisor`):
......@@ -182,6 +184,12 @@ class GLPNImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(img) for img in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -25,6 +25,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -190,7 +191,8 @@ class ImageGPTImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_normalize=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -250,6 +252,12 @@ class ImageGPTImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_normalize:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If you wish to do this, "
"make sure to set `do_normalize` to `False` and that pixel values are between [-1, 1].",
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment