"tests/vscode:/vscode.git/clone" did not exist on "2e3452af0f53fb1b023c70a4596de48efcbfb819"
Unverified Commit 1b2381c4 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

ImageProcessor - check if input pixel values between 0-255 (#25688)

* Check if pixel values between 0-255 and add doc clarification

* Add missing docstrings

* _is_scale_image -> is_scaled_image

* Spelling is hard

* Tidy up
parent 7a6efe1e
......@@ -32,6 +32,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
......@@ -184,7 +185,8 @@ class ViTHybridImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......@@ -270,6 +272,12 @@ class ViTHybridImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
......@@ -34,6 +34,7 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
is_valid_image,
to_numpy_array,
valid_images,
......@@ -257,6 +258,12 @@ class VivitImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
image = to_numpy_array(image)
if is_scaled_image(image) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
input_data_format = infer_channel_dimension_format(image)
......@@ -299,7 +306,8 @@ class VivitImageProcessor(BaseImageProcessor):
Args:
videos (`ImageInput`):
Video frames to preprocess.
Video frames to preprocess. Expects a single or batch of video frames with pixel values ranging from 0
to 255. If passing in frames with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......
......@@ -40,6 +40,7 @@ from ...image_utils import (
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
......@@ -1033,7 +1034,8 @@ class YolosImageProcessor(BaseImageProcessor):
Args:
images (`ImageInput`):
Image or batch of images to preprocess.
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotionation is for object
detection, the annotations should be a dictionary with the following keys:
......@@ -1163,6 +1165,12 @@ class YolosImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment