ImageProcessor - check if input pixel values between 0-255 (#25688)

* Check if pixel values between 0-255 and add doc clarification * Add missing docstrings * _is_scale_image -> is_scaled_image * Spelling is hard * Tidy up

ImageProcessor - check if input pixel values between 0-255 (#25688)
* Check if pixel values between 0-255 and add doc clarification * Add missing docstrings * _is_scale_image -> is_scaled_image * Spelling is hard * Tidy up
1b2381c4 · amyeroberts · GitHub · 7a6efe1e · 1b2381c4 · 1b2381c4
Unverified Commit 1b2381c4 authored Aug 24, 2023 by amyeroberts Committed by GitHub Aug 24, 2023
20 changed files
--- a/src/transformers/image_utils.py
+++ b/src/transformers/image_utils.py
@@ -96,6 +96,17 @@ def is_batched(img):
    return False
+def is_scaled_image(image: np.ndarray) -> bool:
+    """
+    Checks to see whether the pixel values have already been rescaled to [0, 1].
+    """
+    if image.dtype == np.uint8:
+        return False
+    # It's possible the image has pixel values in [0, 255] but is of floating type
+    return np.min(image) >= 0 and np.max(image) <= 1
 def make_list_of_images(images, expected_ndims: int = 3) -> List[ImageInput]:
    """
    Ensure that the input is a list of images. If the input is a single image, it is converted to a list of length 1.

--- a/src/transformers/models/beit/image_processing_beit.py
+++ b/src/transformers/models/beit/image_processing_beit.py
@@ -28,6 +28,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -236,6 +237,11 @@ class BeitImageProcessor(BaseImageProcessor):
        """Preprocesses a single image."""
        # All transformations expect numpy arrays.
        image = to_numpy_array(image)
+        if is_scaled_image(image) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            input_data_format = infer_channel_dimension_format(image)
        image = self._preprocess(
@@ -328,7 +334,8 @@ class BeitImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):

--- a/src/transformers/models/bit/image_processing_bit.py
+++ b/src/transformers/models/bit/image_processing_bit.py
@@ -32,6 +32,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -184,7 +185,8 @@ class BitImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -270,6 +272,12 @@ class BitImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/blip/image_processing_blip.py
+++ b/src/transformers/models/blip/image_processing_blip.py
@@ -27,6 +27,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -176,7 +177,8 @@ class BlipImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -253,6 +255,12 @@ class BlipImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/bridgetower/image_processing_bridgetower.py
+++ b/src/transformers/models/bridgetower/image_processing_bridgetower.py
@@ -29,6 +29,7 @@ from ...image_utils import (
    get_image_size,
    infer_channel_dimension_format,
    is_batched,
+    is_scaled_image,
    to_numpy_array,
    valid_images,
 )
@@ -387,7 +388,8 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -469,6 +471,12 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if do_resize:
            images = [
                self.resize(

--- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py
+++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py
@@ -32,6 +32,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -182,7 +183,8 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -268,6 +270,12 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/clip/image_processing_clip.py
+++ b/src/transformers/models/clip/image_processing_clip.py
@@ -32,6 +32,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -183,7 +184,8 @@ class CLIPImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -269,6 +271,12 @@ class CLIPImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
+++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
@@ -42,6 +42,7 @@ from ...image_utils import (
    PILImageResampling,
    get_image_size,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_coco_detection_annotations,
@@ -1126,7 +1127,8 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image or batch of images to preprocess.
+                Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
+                from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
                List of annotations associated with the image or batch of images. If annotation is for object
                detection, the annotations should be a dictionary with the following keys:
@@ -1259,6 +1261,12 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/convnext/image_processing_convnext.py
+++ b/src/transformers/models/convnext/image_processing_convnext.py
@@ -32,6 +32,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -203,7 +204,8 @@ class ConvNextImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -280,6 +282,12 @@ class ConvNextImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
+++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
@@ -42,6 +42,7 @@ from ...image_utils import (
    PILImageResampling,
    get_image_size,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_coco_detection_annotations,
@@ -1124,7 +1125,8 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image or batch of images to preprocess.
+                Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
+                from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
                List of annotations associated with the image or batch of images. If annotation is for object
                detection, the annotations should be a dictionary with the following keys:
@@ -1257,6 +1259,12 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/deit/image_processing_deit.py
+++ b/src/transformers/models/deit/image_processing_deit.py
@@ -27,6 +27,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -180,7 +181,8 @@ class DeiTImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -258,6 +260,12 @@ class DeiTImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/deta/image_processing_deta.py
+++ b/src/transformers/models/deta/image_processing_deta.py
@@ -40,6 +40,7 @@ from ...image_utils import (
    get_image_size,
    infer_channel_dimension_format,
    is_batched,
+    is_scaled_image,
    to_numpy_array,
    valid_coco_detection_annotations,
    valid_coco_panoptic_annotations,
@@ -796,7 +797,8 @@ class DetaImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image or batch of images to preprocess.
+                Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
+                from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            annotations (`List[Dict]` or `List[List[Dict]]`, *optional*):
                List of annotations associated with the image or batch of images. If annotionation is for object
                detection, the annotations should be a dictionary with the following keys:
@@ -921,6 +923,12 @@ class DetaImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/detr/image_processing_detr.py
+++ b/src/transformers/models/detr/image_processing_detr.py
@@ -41,6 +41,7 @@ from ...image_utils import (
    PILImageResampling,
    get_image_size,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_coco_detection_annotations,
@@ -1096,7 +1097,8 @@ class DetrImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image or batch of images to preprocess.
+                Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
+                from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
                List of annotations associated with the image or batch of images. If annotation is for object
                detection, the annotations should be a dictionary with the following keys:
@@ -1229,6 +1231,12 @@ class DetrImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/donut/image_processing_donut.py
+++ b/src/transformers/models/donut/image_processing_donut.py
@@ -33,6 +33,7 @@ from ...image_utils import (
    PILImageResampling,
    get_image_size,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -319,7 +320,8 @@ class DonutImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -407,6 +409,12 @@ class DonutImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/dpt/image_processing_dpt.py
+++ b/src/transformers/models/dpt/image_processing_dpt.py
@@ -29,6 +29,7 @@ from ...image_utils import (
    PILImageResampling,
    get_image_size,
    infer_channel_dimension_format,
+    is_scaled_image,
    is_torch_available,
    is_torch_tensor,
    make_list_of_images,
@@ -230,7 +231,8 @@ class DPTImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -305,6 +307,12 @@ class DPTImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/efficientformer/image_processing_efficientformer.py
+++ b/src/transformers/models/efficientformer/image_processing_efficientformer.py
@@ -32,6 +32,7 @@ from ...image_utils import (
    PILImageResampling,
    infer_channel_dimension_format,
    is_batched,
+    is_scaled_image,
    to_numpy_array,
    valid_images,
 )
@@ -179,7 +180,8 @@ class EfficientFormerImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -256,6 +258,12 @@ class EfficientFormerImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/efficientnet/image_processing_efficientnet.py
+++ b/src/transformers/models/efficientnet/image_processing_efficientnet.py
@@ -27,6 +27,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -231,7 +232,8 @@ class EfficientNetImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -315,6 +317,12 @@ class EfficientNetImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/flava/image_processing_flava.py
+++ b/src/transformers/models/flava/image_processing_flava.py
@@ -30,6 +30,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -414,6 +415,12 @@ class FlavaImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        image = to_numpy_array(image)
+        if is_scaled_image(image) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(image)
@@ -481,7 +488,8 @@ class FlavaImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):

--- a/src/transformers/models/glpn/image_processing_glpn.py
+++ b/src/transformers/models/glpn/image_processing_glpn.py
@@ -26,6 +26,7 @@ from ...image_utils import (
    PILImageResampling,
    get_image_size,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -137,7 +138,8 @@ class GLPNImageProcessor(BaseImageProcessor):
        Args:
            images (`PIL.Image.Image` or `TensorType` or `List[np.ndarray]` or `List[TensorType]`):
-                The image or images to preprocess.
+                Images to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_normalize=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the input such that the (height, width) dimensions are a multiple of `size_divisor`.
            size_divisor (`int`, *optional*, defaults to `self.size_divisor`):
@@ -182,6 +184,12 @@ class GLPNImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(img) for img in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/imagegpt/image_processing_imagegpt.py
+++ b/src/transformers/models/imagegpt/image_processing_imagegpt.py
@@ -25,6 +25,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -190,7 +191,8 @@ class ImageGPTImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_normalize=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -250,6 +252,12 @@ class ImageGPTImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_normalize:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If you wish to do this, "
+                "make sure to set `do_normalize` to `False` and that pixel values are between [-1, 1].",
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])