ImageProcessor - check if input pixel values between 0-255 (#25688)

* Check if pixel values between 0-255 and add doc clarification * Add missing docstrings * _is_scale_image -> is_scaled_image * Spelling is hard * Tidy up

ImageProcessor - check if input pixel values between 0-255 (#25688)
* Check if pixel values between 0-255 and add doc clarification * Add missing docstrings * _is_scale_image -> is_scaled_image * Spelling is hard * Tidy up
1b2381c4 · amyeroberts · GitHub · 7a6efe1e · 1b2381c4 · 1b2381c4
Unverified Commit 1b2381c4 authored Aug 24, 2023 by amyeroberts Committed by GitHub Aug 24, 2023
3 changed files
--- a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py
+++ b/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py
@@ -32,6 +32,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_images,
@@ -184,7 +185,8 @@ class ViTHybridImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image to preprocess.
+                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
+                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
@@ -270,6 +272,12 @@ class ViTHybridImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])

--- a/src/transformers/models/vivit/image_processing_vivit.py
+++ b/src/transformers/models/vivit/image_processing_vivit.py
@@ -34,6 +34,7 @@ from ...image_utils import (
    ImageInput,
    PILImageResampling,
    infer_channel_dimension_format,
+    is_scaled_image,
    is_valid_image,
    to_numpy_array,
    valid_images,
@@ -257,6 +258,12 @@ class VivitImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays.
        image = to_numpy_array(image)
+        if is_scaled_image(image) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            input_data_format = infer_channel_dimension_format(image)
@@ -299,7 +306,8 @@ class VivitImageProcessor(BaseImageProcessor):
        Args:
            videos (`ImageInput`):
-                Video frames to preprocess.
+                Video frames to preprocess. Expects a single or batch of video frames with pixel values ranging from 0
+                to 255. If passing in frames with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):

--- a/src/transformers/models/yolos/image_processing_yolos.py
+++ b/src/transformers/models/yolos/image_processing_yolos.py
@@ -40,6 +40,7 @@ from ...image_utils import (
    PILImageResampling,
    get_image_size,
    infer_channel_dimension_format,
+    is_scaled_image,
    make_list_of_images,
    to_numpy_array,
    valid_coco_detection_annotations,
@@ -1033,7 +1034,8 @@ class YolosImageProcessor(BaseImageProcessor):
        Args:
            images (`ImageInput`):
-                Image or batch of images to preprocess.
+                Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
+                from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
                List of annotations associated with the image or batch of images. If annotionation is for object
                detection, the annotations should be a dictionary with the following keys:
@@ -1163,6 +1165,12 @@ class YolosImageProcessor(BaseImageProcessor):
        # All transformations expect numpy arrays
        images = [to_numpy_array(image) for image in images]
+        if is_scaled_image(images[0]) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
        if input_data_format is None:
            # We assume that all images have the same channel dimension format.
            input_data_format = infer_channel_dimension_format(images[0])