Update kwargs validation for `preprocess` with decorator (#32024)

* BLIP preprocess * BIT preprocess * BRIDGETOWER preprocess * CHAMELEON preprocess * CHINESE_CLIP preprocess * CONVNEXT preprocess * DEIT preprocess * DONUT preprocess * DPT preprocess * FLAVA preprocess * EFFICIENTNET preprocess * FUYU preprocess * GLPN preprocess * IMAGEGPT preprocess * INTRUCTBLIPVIDEO preprocess * VIVIT preprocess * ZOEDEPTH preprocess * VITMATTE preprocess * VIT preprocess * VILT preprocess * VIDEOMAE preprocess * VIDEOLLAVA * TVP processing * TVP fixup * SWIN2SR preprocess * SIGLIP preprocess * SAM preprocess * RT-DETR preprocess * PVT preprocess * POOLFORMER preprocess * PERCEIVER preprocess * OWLVIT preprocess * OWLV2 preprocess * NOUGAT preprocess * MOBILEVIT preprocess * MOBILENETV2 preprocess * MOBILENETV1 preprocess * LEVIT preprocess * LAYOUTLMV2 preprocess * LAYOUTLMV3 preprocess * Add test * Update tests

Update kwargs validation for `preprocess` with decorator (#32024)
* BLIP preprocess * BIT preprocess * BRIDGETOWER preprocess * CHAMELEON preprocess * CHINESE_CLIP preprocess * CONVNEXT preprocess * DEIT preprocess * DONUT preprocess * DPT preprocess * FLAVA preprocess * EFFICIENTNET preprocess * FUYU preprocess * GLPN preprocess * IMAGEGPT preprocess * INTRUCTBLIPVIDEO preprocess * VIVIT preprocess * ZOEDEPTH preprocess * VITMATTE preprocess * VIT preprocess * VILT preprocess * VIDEOMAE preprocess * VIDEOLLAVA * TVP processing * TVP fixup * SWIN2SR preprocess * SIGLIP preprocess * SAM preprocess * RT-DETR preprocess * PVT preprocess * POOLFORMER preprocess * PERCEIVER preprocess * OWLVIT preprocess * OWLV2 preprocess * NOUGAT preprocess * MOBILEVIT preprocess * MOBILENETV2 preprocess * MOBILENETV1 preprocess * LEVIT preprocess * LAYOUTLMV2 preprocess * LAYOUTLMV3 preprocess * Add test * Update tests
fb66ef81 · Pavel Iakubovskii · GitHub · e85d8639 · fb66ef81 · fb66ef81
Unverified Commit fb66ef81 authored Aug 06, 2024 by Pavel Iakubovskii Committed by GitHub Aug 06, 2024
20 changed files
--- a/src/transformers/models/mobilevit/image_processing_mobilevit.py
+++ b/src/transformers/models/mobilevit/image_processing_mobilevit.py
@@ -29,10 +29,16 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging
+from ...utils import (
+    TensorType,
+    filter_out_non_signature_kwargs,
+    is_torch_available,
+    is_torch_tensor,
+    is_vision_available,
+    logging,
+)
 if is_vision_available():
@@ -105,21 +111,6 @@ class MobileViTImageProcessor(BaseImageProcessor):
        self.do_center_crop = do_center_crop
        self.crop_size = crop_size
        self.do_flip_channel_order = do_flip_channel_order
-        self._valid_processor_keys = [
-            "images",
-            "segmentation_maps",
-            "do_resize",
-            "size",
-            "resample",
-            "do_rescale",
-            "rescale_factor",
-            "do_center_crop",
-            "crop_size",
-            "do_flip_channel_order",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    # Copied from transformers.models.mobilenet_v1.image_processing_mobilenet_v1.MobileNetV1ImageProcessor.resize with PILImageResampling.BICUBIC->PILImageResampling.BILINEAR
    def resize(
@@ -306,6 +297,7 @@ class MobileViTImageProcessor(BaseImageProcessor):
        segmentation_map = segmentation_map.astype(np.int64)
        return segmentation_map
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -321,7 +313,6 @@ class MobileViTImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: ChannelDimension = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -383,8 +374,6 @@ class MobileViTImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if segmentation_maps is not None:
            segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2)

--- a/src/transformers/models/nougat/image_processing_nougat.py
+++ b/src/transformers/models/nougat/image_processing_nougat.py
@@ -38,10 +38,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, logging
 from ...utils.import_utils import is_cv2_available, is_vision_available
@@ -126,24 +125,6 @@ class NougatImageProcessor(BaseImageProcessor):
        self.do_normalize = do_normalize
        self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
-        self._valid_processor_keys = [
-            "images",
-            "do_crop_margin",
-            "do_resize",
-            "size",
-            "resample",
-            "do_thumbnail",
-            "do_align_long_axis",
-            "do_pad",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def python_find_non_zero(self, image: np.array):
        """This is a reimplementation of a findNonZero function equivalent to cv2."""
@@ -375,6 +356,7 @@ class NougatImageProcessor(BaseImageProcessor):
        )
        return resized_image
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -393,7 +375,6 @@ class NougatImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -461,8 +442,6 @@ class NougatImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/owlv2/image_processing_owlv2.py
+++ b/src/transformers/models/owlv2/image_processing_owlv2.py
@@ -37,11 +37,11 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
 from ...utils import (
    TensorType,
+    filter_out_non_signature_kwargs,
    is_scipy_available,
    is_torch_available,
    is_vision_available,
@@ -233,20 +233,6 @@ class Owlv2ImageProcessor(BaseImageProcessor):
        self.do_normalize = do_normalize
        self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
        self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
-        self._valid_processor_keys = [
-            "images",
-            "do_pad",
-            "do_resize",
-            "size",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def pad(
        self,
@@ -346,6 +332,7 @@ class Owlv2ImageProcessor(BaseImageProcessor):
        )
        return image
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -360,7 +347,6 @@ class Owlv2ImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: ChannelDimension = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -416,8 +402,6 @@ class Owlv2ImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/owlvit/image_processing_owlvit.py
+++ b/src/transformers/models/owlvit/image_processing_owlvit.py
@@ -38,10 +38,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, is_torch_available, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, is_torch_available, logging
 if is_torch_available():
@@ -167,22 +166,6 @@ class OwlViTImageProcessor(BaseImageProcessor):
        self.do_normalize = do_normalize
        self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
        self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
-        self._valid_processor_keys = [
-            "images",
-            "do_resize",
-            "size",
-            "resample",
-            "do_center_crop",
-            "crop_size",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def resize(
        self,
@@ -285,6 +268,7 @@ class OwlViTImageProcessor(BaseImageProcessor):
        """
        return rescale(image, rescale_factor, data_format=data_format, input_data_format=input_data_format)
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -301,7 +285,6 @@ class OwlViTImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[TensorType, str]] = None,
        data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> BatchFeature:
        """
        Prepares an image or batch of images for the model.
@@ -373,7 +356,6 @@ class OwlViTImageProcessor(BaseImageProcessor):
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        validate_preprocess_arguments(
            do_rescale=do_rescale,

--- a/src/transformers/models/perceiver/image_processing_perceiver.py
+++ b/src/transformers/models/perceiver/image_processing_perceiver.py
@@ -32,10 +32,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, is_vision_available, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
 if is_vision_available():
@@ -114,22 +113,6 @@ class PerceiverImageProcessor(BaseImageProcessor):
        self.do_normalize = do_normalize
        self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
-        self._valid_processor_keys = [
-            "images",
-            "do_center_crop",
-            "crop_size",
-            "do_resize",
-            "size",
-            "resample",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def center_crop(
        self,
@@ -224,6 +207,7 @@ class PerceiverImageProcessor(BaseImageProcessor):
            **kwargs,
        )
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -240,7 +224,6 @@ class PerceiverImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: ChannelDimension = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -303,8 +286,6 @@ class PerceiverImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/poolformer/image_processing_poolformer.py
+++ b/src/transformers/models/poolformer/image_processing_poolformer.py
@@ -35,10 +35,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, is_vision_available, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
 if is_vision_available():
@@ -133,23 +132,6 @@ class PoolFormerImageProcessor(BaseImageProcessor):
        self.do_normalize = do_normalize
        self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
-        self._valid_processor_keys = [
-            "images",
-            "do_resize",
-            "size",
-            "crop_pct",
-            "resample",
-            "do_center_crop",
-            "crop_size",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def resize(
        self,
@@ -227,6 +209,7 @@ class PoolFormerImageProcessor(BaseImageProcessor):
            **kwargs,
        )
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -244,7 +227,6 @@ class PoolFormerImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: ChannelDimension = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -311,8 +293,6 @@ class PoolFormerImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/pvt/image_processing_pvt.py
+++ b/src/transformers/models/pvt/image_processing_pvt.py
@@ -31,10 +31,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, logging
 logger = logging.get_logger(__name__)
@@ -96,20 +95,6 @@ class PvtImageProcessor(BaseImageProcessor):
        self.rescale_factor = rescale_factor
        self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
-        self._valid_processor_keys = [
-            "images",
-            "do_resize",
-            "size",
-            "resample",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    # Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize
    def resize(
@@ -160,6 +145,7 @@ class PvtImageProcessor(BaseImageProcessor):
            **kwargs,
        )
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -174,7 +160,6 @@ class PvtImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ):
        """
        Preprocess an image or batch of images.
@@ -233,8 +218,6 @@ class PvtImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/rt_detr/image_processing_rt_detr.py
+++ b/src/transformers/models/rt_detr/image_processing_rt_detr.py
@@ -45,10 +45,10 @@ from ...image_utils import (
    to_numpy_array,
    valid_images,
    validate_annotations,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
 from ...utils import (
+    filter_out_non_signature_kwargs,
    is_flax_available,
    is_jax_tensor,
    is_tf_available,
@@ -471,27 +471,6 @@ class RTDetrImageProcessor(BaseImageProcessor):
        self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
        self.do_pad = do_pad
        self.pad_size = pad_size
-        self._valid_processor_keys = [
-            "images",
-            "annotations",
-            "return_segmentation_masks",
-            "masks_path",
-            "do_resize",
-            "size",
-            "resample",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "do_convert_annotations",
-            "image_mean",
-            "image_std",
-            "do_pad",
-            "pad_size",
-            "format",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def prepare_annotation(
        self,
@@ -800,6 +779,7 @@ class RTDetrImageProcessor(BaseImageProcessor):
        return encoded_inputs
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -821,7 +801,6 @@ class RTDetrImageProcessor(BaseImageProcessor):
        data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
        pad_size: Optional[Dict[str, int]] = None,
-        **kwargs,
    ) -> BatchFeature:
        """
        Preprocess an image or a batch of images so that it can be used by the model.
@@ -920,7 +899,6 @@ class RTDetrImageProcessor(BaseImageProcessor):
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
                "torch.Tensor, tf.Tensor or jax.ndarray."
            )
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.

--- a/src/transformers/models/sam/image_processing_sam.py
+++ b/src/transformers/models/sam/image_processing_sam.py
@@ -35,11 +35,11 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
 from ...utils import (
    TensorType,
+    filter_out_non_signature_kwargs,
    is_tf_available,
    is_torch_available,
    is_torchvision_available,
@@ -162,26 +162,6 @@ class SamImageProcessor(BaseImageProcessor):
        self.pad_size = pad_size
        self.mask_pad_size = mask_pad_size
        self.do_convert_rgb = do_convert_rgb
-        self._valid_processor_keys = [
-            "images",
-            "segmentation_maps",
-            "do_resize",
-            "size",
-            "mask_size",
-            "resample",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "do_pad",
-            "pad_size",
-            "mask_pad_size",
-            "do_convert_rgb",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def pad_image(
        self,
@@ -409,6 +389,7 @@ class SamImageProcessor(BaseImageProcessor):
        return segmentation_map, original_size
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -429,7 +410,6 @@ class SamImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: ChannelDimension = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ):
        """
        Preprocess an image or batch of images.
@@ -513,8 +493,6 @@ class SamImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/siglip/image_processing_siglip.py
+++ b/src/transformers/models/siglip/image_processing_siglip.py
@@ -33,10 +33,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, is_vision_available, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
 logger = logging.get_logger(__name__)
@@ -107,22 +106,8 @@ class SiglipImageProcessor(BaseImageProcessor):
        self.image_mean = image_mean
        self.image_std = image_std
        self.do_convert_rgb = do_convert_rgb
-        self._valid_processor_keys = [
-            "images",
-            "do_resize",
-            "size",
-            "resample",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-            "do_convert_rgb",
-        ]
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -138,7 +123,6 @@ class SiglipImageProcessor(BaseImageProcessor):
        data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
        do_convert_rgb: bool = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -199,8 +183,6 @@ class SiglipImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/swin2sr/image_processing_swin2sr.py
+++ b/src/transformers/models/swin2sr/image_processing_swin2sr.py
@@ -28,10 +28,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, logging
 logger = logging.get_logger(__name__)
@@ -66,16 +65,6 @@ class Swin2SRImageProcessor(BaseImageProcessor):
        self.rescale_factor = rescale_factor
        self.do_pad = do_pad
        self.pad_size = pad_size
-        self._valid_processor_keys = [
-            "images",
-            "do_rescale",
-            "rescale_factor",
-            "do_pad",
-            "pad_size",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def pad(
        self,
@@ -118,6 +107,7 @@ class Swin2SRImageProcessor(BaseImageProcessor):
            input_data_format=input_data_format,
        )
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -128,7 +118,6 @@ class Swin2SRImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ):
        """
        Preprocess an image or batch of images.
@@ -172,8 +161,6 @@ class Swin2SRImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/tvp/image_processing_tvp.py
+++ b/src/transformers/models/tvp/image_processing_tvp.py
@@ -36,10 +36,9 @@ from ...image_utils import (
    is_valid_image,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, is_vision_available, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
 if is_vision_available():
@@ -173,27 +172,6 @@ class TvpImageProcessor(BaseImageProcessor):
        self.do_flip_channel_order = do_flip_channel_order
        self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
-        self._valid_processor_keys = [
-            "videos",
-            "do_resize",
-            "size",
-            "resample",
-            "do_center_crop",
-            "crop_size",
-            "do_rescale",
-            "rescale_factor",
-            "do_pad",
-            "pad_size",
-            "constant_values",
-            "pad_mode",
-            "do_normalize",
-            "do_flip_channel_order",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def resize(
        self,
@@ -358,6 +336,7 @@ class TvpImageProcessor(BaseImageProcessor):
        return image
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        videos: Union[ImageInput, List[ImageInput], List[List[ImageInput]]],
@@ -379,7 +358,6 @@ class TvpImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: ChannelDimension = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -459,8 +437,6 @@ class TvpImageProcessor(BaseImageProcessor):
        crop_size = crop_size if crop_size is not None else self.crop_size
        crop_size = get_size_dict(crop_size, param_name="crop_size")
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(videos):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/video_llava/image_processing_video_llava.py
+++ b/src/transformers/models/video_llava/image_processing_video_llava.py
@@ -38,10 +38,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, is_vision_available, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
 logger = logging.get_logger(__name__)
@@ -140,24 +139,6 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
        self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
        self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
        self.do_convert_rgb = do_convert_rgb
-        self._valid_processor_keys = [
-            "images",
-            "videos",
-            "do_resize",
-            "size",
-            "resample",
-            "do_center_crop",
-            "crop_size",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "do_convert_rgb",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def resize(
        self,
@@ -208,6 +189,7 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
            **kwargs,
        )
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: List[ImageInput] = None,
@@ -226,7 +208,6 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -301,8 +282,6 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
        if videos is not None:
            videos = make_batched_videos(videos)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if (videos is not None and not valid_images(videos)) or (images is not None and not valid_images(images)):
            raise ValueError(
                "Invalid input type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/videomae/image_processing_videomae.py
+++ b/src/transformers/models/videomae/image_processing_videomae.py
@@ -35,10 +35,9 @@ from ...image_utils import (
    is_valid_image,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, is_vision_available, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
 if is_vision_available():
@@ -131,22 +130,6 @@ class VideoMAEImageProcessor(BaseImageProcessor):
        self.do_normalize = do_normalize
        self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
-        self._valid_processor_keys = [
-            "videos",
-            "do_resize",
-            "size",
-            "resample",
-            "do_center_crop",
-            "crop_size",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def resize(
        self,
@@ -249,6 +232,7 @@ class VideoMAEImageProcessor(BaseImageProcessor):
        image = to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format)
        return image
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        videos: ImageInput,
@@ -265,7 +249,6 @@ class VideoMAEImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: ChannelDimension = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -328,8 +311,6 @@ class VideoMAEImageProcessor(BaseImageProcessor):
        crop_size = crop_size if crop_size is not None else self.crop_size
        crop_size = get_size_dict(crop_size, param_name="crop_size")
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(videos):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/vilt/image_processing_vilt.py
+++ b/src/transformers/models/vilt/image_processing_vilt.py
@@ -32,10 +32,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, is_vision_available, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
 if is_vision_available():
@@ -192,22 +191,6 @@ class ViltImageProcessor(BaseImageProcessor):
        self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
        self.do_pad = do_pad
-        self._valid_processor_keys = [
-            "images",
-            "do_resize",
-            "size",
-            "size_divisor",
-            "resample",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "do_pad",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    @classmethod
    def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
@@ -351,6 +334,7 @@ class ViltImageProcessor(BaseImageProcessor):
        return BatchFeature(data=data, tensor_type=return_tensors)
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -367,7 +351,6 @@ class ViltImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: ChannelDimension = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -433,8 +416,6 @@ class ViltImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/vit/image_processing_vit.py
+++ b/src/transformers/models/vit/image_processing_vit.py
@@ -31,10 +31,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, logging
 logger = logging.get_logger(__name__)
@@ -96,20 +95,6 @@ class ViTImageProcessor(BaseImageProcessor):
        self.rescale_factor = rescale_factor
        self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
-        self._valid_processor_keys = [
-            "images",
-            "do_resize",
-            "size",
-            "resample",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def resize(
        self,
@@ -159,6 +144,7 @@ class ViTImageProcessor(BaseImageProcessor):
            **kwargs,
        )
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -173,7 +159,6 @@ class ViTImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ):
        """
        Preprocess an image or batch of images.
@@ -232,8 +217,6 @@ class ViTImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(images):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/vitmatte/image_processing_vitmatte.py
+++ b/src/transformers/models/vitmatte/image_processing_vitmatte.py
@@ -31,10 +31,9 @@ from ...image_utils import (
    make_list_of_images,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, logging
+from ...utils import TensorType, filter_out_non_signature_kwargs, logging
 logger = logging.get_logger(__name__)
@@ -88,20 +87,6 @@ class VitMatteImageProcessor(BaseImageProcessor):
        self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
        self.size_divisibility = size_divisibility
-        self._valid_processor_keys = [
-            "images",
-            "trimaps",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "do_pad",
-            "size_divisibility",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def pad_image(
        self,
@@ -144,6 +129,7 @@ class VitMatteImageProcessor(BaseImageProcessor):
        return image
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,
@@ -158,7 +144,6 @@ class VitMatteImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ):
        """
        Preprocess an image or batch of images.
@@ -213,8 +198,6 @@ class VitMatteImageProcessor(BaseImageProcessor):
        images = make_list_of_images(images)
        trimaps = make_list_of_images(trimaps, expected_ndims=2)
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(trimaps):
            raise ValueError(
                "Invalid trimap type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/vivit/image_processing_vivit.py
+++ b/src/transformers/models/vivit/image_processing_vivit.py
@@ -39,10 +39,9 @@ from ...image_utils import (
    is_valid_image,
    to_numpy_array,
    valid_images,
-    validate_kwargs,
    validate_preprocess_arguments,
 )
-from ...utils import logging
+from ...utils import filter_out_non_signature_kwargs, logging
 if is_vision_available():
@@ -139,23 +138,6 @@ class VivitImageProcessor(BaseImageProcessor):
        self.do_normalize = do_normalize
        self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
        self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
-        self._valid_processor_keys = [
-            "videos",
-            "do_resize",
-            "size",
-            "resample",
-            "do_center_crop",
-            "crop_size",
-            "do_rescale",
-            "rescale_factor",
-            "offset",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def resize(
        self,
@@ -304,6 +286,7 @@ class VivitImageProcessor(BaseImageProcessor):
        image = to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format)
        return image
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        videos: ImageInput,
@@ -321,7 +304,6 @@ class VivitImageProcessor(BaseImageProcessor):
        return_tensors: Optional[Union[str, TensorType]] = None,
        data_format: ChannelDimension = ChannelDimension.FIRST,
        input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        **kwargs,
    ) -> PIL.Image.Image:
        """
        Preprocess an image or batch of images.
@@ -387,8 +369,6 @@ class VivitImageProcessor(BaseImageProcessor):
        crop_size = crop_size if crop_size is not None else self.crop_size
        crop_size = get_size_dict(crop_size, param_name="crop_size")
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
        if not valid_images(videos):
            raise ValueError(
                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "

--- a/src/transformers/models/zoedepth/image_processing_zoedepth.py
+++ b/src/transformers/models/zoedepth/image_processing_zoedepth.py
@@ -35,7 +35,14 @@ from ...image_utils import (
    valid_images,
    validate_preprocess_arguments,
 )
-from ...utils import TensorType, is_torch_available, is_vision_available, logging, requires_backends
+from ...utils import (
+    TensorType,
+    filter_out_non_signature_kwargs,
+    is_torch_available,
+    is_vision_available,
+    logging,
+    requires_backends,
+)
 if is_vision_available():
@@ -164,24 +171,6 @@ class ZoeDepthImageProcessor(BaseImageProcessor):
        self.ensure_multiple_of = ensure_multiple_of
        self.resample = resample
-        self._valid_processor_keys = [
-            "images",
-            "do_resize",
-            "size",
-            "keep_aspect_ratio",
-            "ensure_multiple_of",
-            "resample",
-            "do_rescale",
-            "rescale_factor",
-            "do_normalize",
-            "image_mean",
-            "image_std",
-            "do_pad",
-            "return_tensors",
-            "data_format",
-            "input_data_format",
-        ]
    def resize(
        self,
        image: np.ndarray,
@@ -301,6 +290,7 @@ class ZoeDepthImageProcessor(BaseImageProcessor):
            input_data_format=input_data_format,
        )
+    @filter_out_non_signature_kwargs()
    def preprocess(
        self,
        images: ImageInput,

--- a/src/transformers/utils/generic.py
+++ b/src/transformers/utils/generic.py
@@ -816,6 +816,9 @@ def filter_out_non_signature_kwargs(extra: Optional[list] = None):
        is_instance_method = "self" in function_named_args
        is_class_method = "cls" in function_named_args
+        # Mark function as decorated
+        func._filter_out_non_signature_kwargs = True
        @wraps(func)
        def wrapper(*args, **kwargs):
            valid_kwargs = {}