Abstract image processor arg checks. (#28843)

* abstract image processor arg checks. * fix signatures and quality * add validate_ method to rescale-prone processors * add more validations * quality * quality * fix formatting Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fix formatting Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fix formatting Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Fix formatting mishap Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fix crop_size compatibility * fix default mutable arg * fix segmentation map + image arg validity * remove segmentation check from arg validation * fix quality * fix missing segmap * protect PILImageResampling type * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * add back segmentation maps check --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

Abstract image processor arg checks. (#28843)
* abstract image processor arg checks. * fix signatures and quality * add validate_ method to rescale-prone processors * add more validations * quality * quality * fix formatting Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fix formatting Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fix formatting Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Fix formatting mishap Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fix crop_size compatibility * fix default mutable arg * fix segmentation map + image arg validity * remove segmentation check from arg validation * fix quality * fix missing segmap * protect PILImageResampling type * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * add back segmentation maps check --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
1c9134f0 · Pablo Montalvo · GitHub · f7ef7cec · 1c9134f0 · 1c9134f0
Unverified Commit 1c9134f0 authored Feb 20, 2024 by Pablo Montalvo Committed by GitHub Feb 20, 2024
20 changed files
--- a/src/transformers/image_utils.py
+++ b/src/transformers/image_utils.py
@@ -337,6 +337,47 @@ def load_image(image: Union[str, "PIL.Image.Image"], timeout: Optional[float] =
    return image


+def validate_preprocess_arguments(
+    do_rescale: Optional[bool] = None,
+    rescale_factor: Optional[float] = None,
+    do_normalize: Optional[bool] = None,
+    image_mean: Optional[Union[float, List[float]]] = None,
+    image_std: Optional[Union[float, List[float]]] = None,
+    do_pad: Optional[bool] = None,
+    size_divisibility: Optional[int] = None,
+    do_center_crop: Optional[bool] = None,
+    crop_size: Optional[Dict[str, int]] = None,
+    do_resize: Optional[bool] = None,
+    size: Optional[Dict[str, int]] = None,
+    resample: Optional["PILImageResampling"] = None,
+):
+    """
+    Checks validity of typically used arguments in an `ImageProcessor` `preprocess` method.
+    Raises `ValueError` if arguments incompatibility is caught.
+    Many incompatibilities are model-specific. `do_pad` sometimes needs `size_divisor`,
+    sometimes `size_divisibility`, and sometimes `size`. New models and processors added should follow
+    existing arguments when possible.
+
+    """
+    if do_rescale and rescale_factor is None:
+        raise ValueError("rescale_factor must be specified if do_rescale is True.")
+
+    if do_pad and size_divisibility is None:
+        # Here, size_divisor might be passed as the value of size
+        raise ValueError(
+            "Depending on moel, size_divisibility, size_divisor, pad_size or size must be specified if do_pad is True."
+        )
+
+    if do_normalize and (image_mean is None or image_std is None):
+        raise ValueError("image_mean and image_std must both be specified if do_normalize is True.")
+
+    if do_center_crop and crop_size is None:
+        raise ValueError("crop_size must be specified if do_center_crop is True.")
+
+    if do_resize and (size is None or resample is None):
+        raise ValueError("size and resample must be specified if do_resize is True.")
+
+
 # In the future we can add a TF implementation here when we have TF models.
 class ImageFeatureExtractionMixin:
    """

--- a/src/transformers/models/beit/image_processing_beit.py
+++ b/src/transformers/models/beit/image_processing_beit.py
@@ -32,6 +32,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging

@@ -396,32 +397,33 @@ class BeitImageProcessor(BaseImageProcessor):
        do_reduce_labels = do_reduce_labels if do_reduce_labels is not None else self.do_reduce_labels

        images = make_list_of_images(images)
+
        if segmentation_maps is not None:
            segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2)

-        if not valid_images(images):
+        if segmentation_maps is not None and not valid_images(segmentation_maps):
            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
+                "Invalid segmentation_maps type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
-
-        if segmentation_maps is not None and not valid_images(segmentation_maps):
+        if not valid_images(images):
            raise ValueError(
-                "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, "
+                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )

-        if do_resize and size is None or resample is None:
-            raise ValueError("Size and resample must be specified if do_resize is True.")
-
-        if do_center_crop and crop_size is None:
-            raise ValueError("Crop size must be specified if do_center_crop is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )

        images = [
            self._preprocess_image(

--- a/src/transformers/models/bit/image_processing_bit.py
+++ b/src/transformers/models/bit/image_processing_bit.py
@@ -36,6 +36,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging

@@ -263,17 +264,18 @@ class BitImageProcessor(BaseImageProcessor):
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )

-        if do_resize and size is None:
-            raise ValueError("Size must be specified if do_resize is True.")
-
-        if do_center_crop and crop_size is None:
-            raise ValueError("Crop size must be specified if do_center_crop is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )

        # PIL RGBA images are converted to RGB
        if do_convert_rgb:

--- a/src/transformers/models/blip/image_processing_blip.py
+++ b/src/transformers/models/blip/image_processing_blip.py
@@ -31,6 +31,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging

@@ -239,15 +240,16 @@ class BlipImageProcessor(BaseImageProcessor):
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )

-        if do_resize and size is None or resample is None:
-            raise ValueError("Size and resample must be specified if do_resize is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
-
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
        # PIL RGBA images are converted to RGB
        if do_convert_rgb:
            images = [convert_to_rgb(image) for image in images]

--- a/src/transformers/models/bridgetower/image_processing_bridgetower.py
+++ b/src/transformers/models/bridgetower/image_processing_bridgetower.py
@@ -32,6 +32,7 @@ from ...image_utils import (
    is_scaled_image,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging

@@ -128,7 +129,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
            `do_resize` parameter in the `preprocess` method.
-        size (`Dict[str, int]` *optional*, defaults to 288):
+        size (`Dict[str, int]` *optional*, defaults to `{'shortest_edge': 288}`):
            Resize the shorter side of the input to `size["shortest_edge"]`. The longer side will be limited to under
            `int((1333 / 800) * size["shortest_edge"])` while preserving the aspect ratio. Only has an effect if
            `do_resize` is set to `True`. Can be overridden by the `size` parameter in the `preprocess` method.
@@ -158,6 +159,9 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
        do_center_crop (`bool`, *optional*, defaults to `True`):
            Whether to center crop the image. Can be overridden by the `do_center_crop` parameter in the `preprocess`
            method.
+        crop_size (`Dict[str, int]`, *optional*):
+            Desired output size when applying center-cropping. Only has an effect if `do_center_crop` is set to `True`.
+            Can be overridden by the `crop_size` parameter in the `preprocess` method. If unset defaults to `size`,
        do_pad (`bool`, *optional*, defaults to `True`):
            Whether to pad the image to the `(max_height, max_width)` of the images in the batch. Can be overridden by
            the `do_pad` parameter in the `preprocess` method.
@@ -168,7 +172,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
    def __init__(
        self,
        do_resize: bool = True,
-        size: Dict[str, int] = 288,
+        size: Dict[str, int] = None,
        size_divisor: int = 32,
        resample: PILImageResampling = PILImageResampling.BICUBIC,
        do_rescale: bool = True,
@@ -177,6 +181,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
        image_mean: Optional[Union[float, List[float]]] = None,
        image_std: Optional[Union[float, List[float]]] = None,
        do_center_crop: bool = True,
+        crop_size: Dict[str, int] = None,
        do_pad: bool = True,
        **kwargs,
    ) -> None:
@@ -198,6 +203,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
        self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
        self.do_pad = do_pad
        self.do_center_crop = do_center_crop
+        self.crop_size = crop_size

    # Copied from transformers.models.vilt.image_processing_vilt.ViltImageProcessor.resize
    def resize(
@@ -378,6 +384,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
        image_std: Optional[Union[float, List[float]]] = None,
        do_pad: Optional[bool] = None,
        do_center_crop: Optional[bool] = None,
+        crop_size: Dict[str, int] = None,
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: ChannelDimension = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
@@ -417,6 +424,9 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
            do_center_crop (`bool`, *optional*, defaults to `self.do_center_crop`):
                Whether to center crop the image. If the input size is smaller than `crop_size` along any edge, the
                image is padded with 0's and then center cropped.
+            crop_size (`Dict[str, int]`, *optional*, defaults to `self.crop_size`):
+                Size of the image after center crop. If one edge the image is smaller than `crop_size`, it will be
+                padded with zeros and then cropped
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
@@ -446,6 +456,11 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
        image_std = image_std if image_std is not None else self.image_std
        do_pad = do_pad if do_pad is not None else self.do_pad
        do_center_crop if do_center_crop is not None else self.do_center_crop
+        # For backwards compatibility. Initial version of this processor was cropping to the "size" argument, which
+        # it should default to if crop_size is undefined.
+        crop_size = (
+            crop_size if crop_size is not None else (self.crop_size if self.crop_size is not None else self.size)
+        )

        size = size if size is not None else self.size
        size = get_size_dict(size, default_to_square=False)
@@ -458,16 +473,21 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
-
-        if do_resize and size is None or resample is None:
-            raise ValueError("Size and resample must be specified if do_resize is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
-
+        # Here, crop_size is used only if it is set, else size will be used.
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_pad=do_pad,
+            size_divisibility=size_divisor,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]

@@ -491,7 +511,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor):

        if do_center_crop:
            images = [
-                self.center_crop(image=image, size=size, input_data_format=input_data_format) for image in images
+                self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images
            ]

        if do_rescale:

--- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py
+++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py
@@ -36,6 +36,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging

@@ -251,20 +252,18 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
-
-        if do_resize and size is None:
-            raise ValueError("Size must be specified if do_resize is True.")
-
-        if do_center_crop and crop_size is None:
-            raise ValueError("Crop size must be specified if do_center_crop is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
-
-        # PIL RGBA images are converted to RGB
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
        if do_convert_rgb:
            images = [convert_to_rgb(image) for image in images]


--- a/src/transformers/models/clip/image_processing_clip.py
+++ b/src/transformers/models/clip/image_processing_clip.py
@@ -36,6 +36,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging

@@ -265,20 +266,19 @@ class CLIPImageProcessor(BaseImageProcessor):
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )

-        if do_resize and size is None:
-            raise ValueError("Size must be specified if do_resize is True.")
-
-        if do_center_crop and crop_size is None:
-            raise ValueError("Crop size must be specified if do_center_crop is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
-
-        # PIL RGBA images are converted to RGB
        if do_convert_rgb:
            images = [convert_to_rgb(image) for image in images]


--- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
+++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
@@ -49,6 +49,7 @@ from ...image_utils import (
    to_numpy_array,
    valid_images,
    validate_annotations,
+    validate_preprocess_arguments,
 )
 from ...utils import (
    TensorType,
@@ -1291,16 +1292,27 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
        do_pad = self.do_pad if do_pad is None else do_pad
        format = self.format if format is None else format

-        if do_resize is not None and size is None:
-            raise ValueError("Size and max_size must be specified if do_resize is True.")
+        images = make_list_of_images(images)

-        if do_rescale is not None and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
+        if not valid_images(images):
+            raise ValueError(
+                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
+                "torch.Tensor, tf.Tensor or jax.ndarray."
+            )

-        if do_normalize is not None and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
+        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
+
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )

-        images = make_list_of_images(images)
        if annotations is not None and isinstance(annotations, dict):
            annotations = [annotations]

@@ -1309,12 +1321,6 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
                f"The number of images ({len(images)}) and annotations ({len(annotations)}) do not match."
            )

-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
        format = AnnotationFormat(format)
        if annotations is not None:
            validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)

--- a/src/transformers/models/convnext/image_processing_convnext.py
+++ b/src/transformers/models/convnext/image_processing_convnext.py
@@ -36,6 +36,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging

@@ -267,17 +268,16 @@ class ConvNextImageProcessor(BaseImageProcessor):
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )

-        if do_resize and size is None or resample is None:
-            raise ValueError("Size and resample must be specified if do_resize is True.")
-
-        if do_resize and size["shortest_edge"] < 384 and crop_pct is None:
-            raise ValueError("crop_pct must be specified if size < 384.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )

        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]

--- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
+++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
@@ -49,6 +49,7 @@ from ...image_utils import (
    to_numpy_array,
    valid_images,
    validate_annotations,
+    validate_preprocess_arguments,
 )
 from ...utils import (
    TensorType,
@@ -1289,16 +1290,27 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
        do_pad = self.do_pad if do_pad is None else do_pad
        format = self.format if format is None else format

-        if do_resize is not None and size is None:
-            raise ValueError("Size and max_size must be specified if do_resize is True.")
+        images = make_list_of_images(images)

-        if do_rescale is not None and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
+        if not valid_images(images):
+            raise ValueError(
+                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
+                "torch.Tensor, tf.Tensor or jax.ndarray."
+            )

-        if do_normalize is not None and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
+        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
+
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )

-        images = make_list_of_images(images)
        if annotations is not None and isinstance(annotations, dict):
            annotations = [annotations]

@@ -1307,12 +1319,6 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
                f"The number of images ({len(images)}) and annotations ({len(annotations)}) do not match."
            )

-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
        format = AnnotationFormat(format)
        if annotations is not None:
            validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)

--- a/src/transformers/models/deit/image_processing_deit.py
+++ b/src/transformers/models/deit/image_processing_deit.py
@@ -31,6 +31,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging

@@ -244,19 +245,18 @@ class DeiTImageProcessor(BaseImageProcessor):
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
-
-        if do_resize and size is None or resample is None:
-            raise ValueError("Size and resample must be specified if do_resize is True.")
-
-        if do_center_crop and crop_size is None:
-            raise ValueError("Crop size must be specified if do_center_crop is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
-
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]


--- a/src/transformers/models/deta/image_processing_deta.py
+++ b/src/transformers/models/deta/image_processing_deta.py
@@ -46,6 +46,7 @@ from ...image_utils import (
    to_numpy_array,
    valid_images,
    validate_annotations,
+    validate_preprocess_arguments,
 )
 from ...utils import (
    is_flax_available,
@@ -955,29 +956,32 @@ class DetaImageProcessor(BaseImageProcessor):
        do_pad = self.do_pad if do_pad is None else do_pad
        format = self.format if format is None else format

-        if do_resize is not None and size is None:
-            raise ValueError("Size and max_size must be specified if do_resize is True.")
-
-        if do_rescale is not None and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize is not None and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
+        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
+
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )

        if not is_batched(images):
            images = [images]
            annotations = [annotations] if annotations is not None else None

-        if annotations is not None and len(images) != len(annotations):
-            raise ValueError(
-                f"The number of images ({len(images)}) and annotations ({len(annotations)}) do not match."
-            )
-
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
+        if annotations is not None and len(images) != len(annotations):
+            raise ValueError(
+                f"The number of images ({len(images)}) and annotations ({len(annotations)}) do not match."
+            )

        format = AnnotationFormat(format)
        if annotations is not None:

--- a/src/transformers/models/detr/image_processing_detr.py
+++ b/src/transformers/models/detr/image_processing_detr.py
@@ -48,6 +48,7 @@ from ...image_utils import (
    to_numpy_array,
    valid_images,
    validate_annotations,
+    validate_preprocess_arguments,
 )
 from ...utils import (
    TensorType,
@@ -1261,16 +1262,27 @@ class DetrImageProcessor(BaseImageProcessor):
        do_pad = self.do_pad if do_pad is None else do_pad
        format = self.format if format is None else format

-        if do_resize is not None and size is None:
-            raise ValueError("Size and max_size must be specified if do_resize is True.")
+        images = make_list_of_images(images)

-        if do_rescale is not None and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
+        if not valid_images(images):
+            raise ValueError(
+                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
+                "torch.Tensor, tf.Tensor or jax.ndarray."
+            )

-        if do_normalize is not None and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
+        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
+
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )

-        images = make_list_of_images(images)
        if annotations is not None and isinstance(annotations, dict):
            annotations = [annotations]

@@ -1279,12 +1291,6 @@ class DetrImageProcessor(BaseImageProcessor):
                f"The number of images ({len(images)}) and annotations ({len(annotations)}) do not match."
            )

-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
        format = AnnotationFormat(format)
        if annotations is not None:
            validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)

--- a/src/transformers/models/donut/image_processing_donut.py
+++ b/src/transformers/models/donut/image_processing_donut.py
@@ -37,6 +37,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 from ...utils.import_utils import is_vision_available
@@ -392,18 +393,18 @@ class DonutImageProcessor(BaseImageProcessor):
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
-
-        if do_resize and size is None:
-            raise ValueError("Size must be specified if do_resize is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_pad and size is None:
-            raise ValueError("Size must be specified if do_pad is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_pad=do_pad,
+            size_divisibility=size,  # There is no pad divisibility in this processor, but pad requires the size arg.
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )

        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]

--- a/src/transformers/models/dpt/image_processing_dpt.py
+++ b/src/transformers/models/dpt/image_processing_dpt.py
@@ -35,6 +35,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging

@@ -354,19 +355,18 @@ class DPTImageProcessor(BaseImageProcessor):
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
-
-        if do_resize and size is None or resample is None:
-            raise ValueError("Size and resample must be specified if do_resize is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
-
-        if do_pad and size_divisor is None:
-            raise ValueError("Size divisibility must be specified if do_pad is True.")
-
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_pad=do_pad,
+            size_divisibility=size_divisor,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]


--- a/src/transformers/models/efficientformer/image_processing_efficientformer.py
+++ b/src/transformers/models/efficientformer/image_processing_efficientformer.py
@@ -35,6 +35,7 @@ from ...image_utils import (
    is_scaled_image,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging

@@ -245,16 +246,18 @@ class EfficientFormerImageProcessor(BaseImageProcessor):
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
-
-        if do_resize and size is None:
-            raise ValueError("Size must be specified if do_resize is True.")
-
-        if do_center_crop and crop_size is None:
-            raise ValueError("Crop size must be specified if do_center_crop is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]


--- a/src/transformers/models/efficientnet/image_processing_efficientnet.py
+++ b/src/transformers/models/efficientnet/image_processing_efficientnet.py
@@ -31,6 +31,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging

@@ -301,19 +302,18 @@ class EfficientNetImageProcessor(BaseImageProcessor):
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
-
-        if do_resize and size is None or resample is None:
-            raise ValueError("Size and resample must be specified if do_resize is True.")
-
-        if do_center_crop and crop_size is None:
-            raise ValueError("Crop size must be specified if do_center_crop is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
-
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
        # All transformations expect numpy arrays.
        images = [to_numpy_array(image) for image in images]


--- a/src/transformers/models/flava/image_processing_flava.py
+++ b/src/transformers/models/flava/image_processing_flava.py
@@ -34,6 +34,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging

@@ -403,14 +404,19 @@ class FlavaImageProcessor(BaseImageProcessor):
        input_data_format: Optional[ChannelDimension] = None,
    ) -> np.ndarray:
        """Preprocesses a single image."""
-        if do_resize and size is None or resample is None:
-            raise ValueError("Size and resample must be specified if do_resize is True.")

-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )

        # All transformations expect numpy arrays.
        image = to_numpy_array(image)

--- a/src/transformers/models/fuyu/image_processing_fuyu.py
+++ b/src/transformers/models/fuyu/image_processing_fuyu.py
@@ -35,6 +35,7 @@ from ...image_utils import (
    is_valid_image,
    make_list_of_images,
    to_numpy_array,
+    validate_preprocess_arguments,
 )
 from ...utils import (
    TensorType,
@@ -446,15 +447,18 @@ class FuyuImageProcessor(BaseImageProcessor):

        batch_images = make_list_of_list_of_images(images)

-        if do_resize and size is None:
-            raise ValueError("Size must be specified if do_resize is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and image_mean is None or image_std is None:
-            raise ValueError("image_mean and image_std must be specified if do_normalize is True.")
-
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_pad=do_pad,
+            size_divisibility=size,  # There is no pad divisibility in this processor, but pad requires the size arg.
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
        # All transformations expect numpy arrays.
        batch_images = [[to_numpy_array(image) for image in images] for images in batch_images]


--- a/src/transformers/models/glpn/image_processing_glpn.py
+++ b/src/transformers/models/glpn/image_processing_glpn.py
@@ -30,6 +30,7 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
+    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging

@@ -173,13 +174,21 @@ class GLPNImageProcessor(BaseImageProcessor):
        size_divisor = size_divisor if size_divisor is not None else self.size_divisor
        resample = resample if resample is not None else self.resample

-        if do_resize and size_divisor is None:
-            raise ValueError("size_divisor is required for resizing")
-
        images = make_list_of_images(images)

        if not valid_images(images):
-            raise ValueError("Invalid image(s)")
+            raise ValueError(
+                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
+                "torch.Tensor, tf.Tensor or jax.ndarray."
+            )
+
+        # Here, the rescale() method uses a constant rescale_factor. It does not need to be validated
+        # with a rescale_factor.
+        validate_preprocess_arguments(
+            do_resize=do_resize,
+            size=size_divisor,  # Here, size_divisor is used as a parameter for optimal resizing instead of size.
+            resample=resample,
+        )

        # All transformations expect numpy arrays.
        images = [to_numpy_array(img) for img in images]