"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "01734dba842c29408c96caa5c345c9e415c7569b"
Unverified Commit 1b2381c4 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

ImageProcessor - check if input pixel values between 0-255 (#25688)

* Check if pixel values between 0-255 and add doc clarification

* Add missing docstrings

* _is_scale_image -> is_scaled_image

* Spelling is hard

* Tidy up
parent 7a6efe1e
...@@ -32,6 +32,7 @@ from ...image_utils import ( ...@@ -32,6 +32,7 @@ from ...image_utils import (
ImageInput, ImageInput,
PILImageResampling, PILImageResampling,
infer_channel_dimension_format, infer_channel_dimension_format,
is_scaled_image,
make_list_of_images, make_list_of_images,
to_numpy_array, to_numpy_array,
valid_images, valid_images,
...@@ -184,7 +185,8 @@ class ViTHybridImageProcessor(BaseImageProcessor): ...@@ -184,7 +185,8 @@ class ViTHybridImageProcessor(BaseImageProcessor):
Args: Args:
images (`ImageInput`): images (`ImageInput`):
Image to preprocess. Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`): do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image. Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`): size (`Dict[str, int]`, *optional*, defaults to `self.size`):
...@@ -270,6 +272,12 @@ class ViTHybridImageProcessor(BaseImageProcessor): ...@@ -270,6 +272,12 @@ class ViTHybridImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays. # All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images] images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None: if input_data_format is None:
# We assume that all images have the same channel dimension format. # We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0]) input_data_format = infer_channel_dimension_format(images[0])
......
...@@ -34,6 +34,7 @@ from ...image_utils import ( ...@@ -34,6 +34,7 @@ from ...image_utils import (
ImageInput, ImageInput,
PILImageResampling, PILImageResampling,
infer_channel_dimension_format, infer_channel_dimension_format,
is_scaled_image,
is_valid_image, is_valid_image,
to_numpy_array, to_numpy_array,
valid_images, valid_images,
...@@ -257,6 +258,12 @@ class VivitImageProcessor(BaseImageProcessor): ...@@ -257,6 +258,12 @@ class VivitImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays. # All transformations expect numpy arrays.
image = to_numpy_array(image) image = to_numpy_array(image)
if is_scaled_image(image) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None: if input_data_format is None:
input_data_format = infer_channel_dimension_format(image) input_data_format = infer_channel_dimension_format(image)
...@@ -299,7 +306,8 @@ class VivitImageProcessor(BaseImageProcessor): ...@@ -299,7 +306,8 @@ class VivitImageProcessor(BaseImageProcessor):
Args: Args:
videos (`ImageInput`): videos (`ImageInput`):
Video frames to preprocess. Video frames to preprocess. Expects a single or batch of video frames with pixel values ranging from 0
to 255. If passing in frames with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`): do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image. Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`): size (`Dict[str, int]`, *optional*, defaults to `self.size`):
......
...@@ -40,6 +40,7 @@ from ...image_utils import ( ...@@ -40,6 +40,7 @@ from ...image_utils import (
PILImageResampling, PILImageResampling,
get_image_size, get_image_size,
infer_channel_dimension_format, infer_channel_dimension_format,
is_scaled_image,
make_list_of_images, make_list_of_images,
to_numpy_array, to_numpy_array,
valid_coco_detection_annotations, valid_coco_detection_annotations,
...@@ -1033,7 +1034,8 @@ class YolosImageProcessor(BaseImageProcessor): ...@@ -1033,7 +1034,8 @@ class YolosImageProcessor(BaseImageProcessor):
Args: Args:
images (`ImageInput`): images (`ImageInput`):
Image or batch of images to preprocess. Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*): annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotionation is for object List of annotations associated with the image or batch of images. If annotionation is for object
detection, the annotations should be a dictionary with the following keys: detection, the annotations should be a dictionary with the following keys:
...@@ -1163,6 +1165,12 @@ class YolosImageProcessor(BaseImageProcessor): ...@@ -1163,6 +1165,12 @@ class YolosImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays # All transformations expect numpy arrays
images = [to_numpy_array(image) for image in images] images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None: if input_data_format is None:
# We assume that all images have the same channel dimension format. # We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0]) input_data_format = infer_channel_dimension_format(images[0])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment