Remove deprecated logic and warnings (#30743)

* Remove deprecated logic and warnings * Add back some code that seems to be important... * Let's just add all he nllb stuff back; removing it is a bit more involved * Remove kwargs * Remove more kwargs

Remove deprecated logic and warnings (#30743)
* Remove deprecated logic and warnings * Add back some code that seems to be important... * Let's just add all he nllb stuff back; removing it is a bit more involved * Remove kwargs * Remove more kwargs
57c965a8 · amyeroberts · GitHub · 3d7d3a87 · 57c965a8 · 57c965a8
Unverified Commit 57c965a8 authored May 17, 2024 by amyeroberts Committed by GitHub May 17, 2024
20 changed files
--- a/examples/tensorflow/token-classification/run_ner.py
+++ b/examples/tensorflow/token-classification/run_ner.py
@@ -21,7 +21,6 @@ import json
 import logging
 import os
 import random
-import warnings
 from dataclasses import dataclass, field
 from typing import Optional
@@ -85,12 +84,6 @@ class ModelArguments:
            )
        },
    )
-    use_auth_token: bool = field(
-        default=None,
-        metadata={
-            "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
-        },
-    )
    trust_remote_code: bool = field(
        default=False,
        metadata={
@@ -213,15 +206,6 @@ def main():
    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TFTrainingArguments))
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-    if model_args.use_auth_token is not None:
-        warnings.warn(
-            "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
-            FutureWarning,
-        )
-        if model_args.token is not None:
-            raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
-        model_args.token = model_args.use_auth_token
    # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
    # information sent is the one passed as arguments along with your Python/PyTorch versions.
    send_example_telemetry("run_ner", model_args, data_args, framework="tensorflow")

--- a/examples/tensorflow/translation/run_translation.py
+++ b/examples/tensorflow/translation/run_translation.py
@@ -22,7 +22,6 @@ import json
 import logging
 import os
 import sys
-import warnings
 from dataclasses import dataclass, field
 from typing import Optional
@@ -103,12 +102,6 @@ class ModelArguments:
            )
        },
    )
-    use_auth_token: bool = field(
-        default=None,
-        metadata={
-            "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
-        },
-    )
    trust_remote_code: bool = field(
        default=False,
        metadata={
@@ -285,15 +278,6 @@ def main():
    else:
        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-    if model_args.use_auth_token is not None:
-        warnings.warn(
-            "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
-            FutureWarning,
-        )
-        if model_args.token is not None:
-            raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
-        model_args.token = model_args.use_auth_token
    # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
    # information sent is the one passed as arguments along with your Python/PyTorch versions.
    send_example_telemetry("run_translation", model_args, data_args, framework="tensorflow")

--- a/src/transformers/image_utils.py
+++ b/src/transformers/image_utils.py
@@ -727,23 +727,11 @@ class ImageFeatureExtractionMixin:
        )
-def promote_annotation_format(annotation_format: Union[AnnotionFormat, AnnotationFormat]) -> AnnotationFormat:
-    # can be removed when `AnnotionFormat` is fully deprecated
-    return AnnotationFormat(annotation_format.value)
 def validate_annotations(
    annotation_format: AnnotationFormat,
    supported_annotation_formats: Tuple[AnnotationFormat, ...],
    annotations: List[Dict],
 ) -> None:
-    if isinstance(annotation_format, AnnotionFormat):
-        logger.warning_once(
-            f"`{annotation_format.__class__.__name__}` is deprecated and will be removed in v4.38. "
-            f"Please use `{AnnotationFormat.__name__}` instead."
-        )
-        annotation_format = promote_annotation_format(annotation_format)
    if annotation_format not in supported_annotation_formats:
        raise ValueError(f"Unsupported annotation format: {format} must be one of {supported_annotation_formats}")

--- a/src/transformers/models/auto/image_processing_auto.py
+++ b/src/transformers/models/auto/image_processing_auto.py
@@ -371,22 +371,10 @@ class AutoImageProcessor:
        if image_processor_class is None and image_processor_auto_map is None:
            feature_extractor_class = config_dict.pop("feature_extractor_type", None)
            if feature_extractor_class is not None:
-                logger.warning(
-                    "Could not find image processor class in the image processor config or the model config. Loading "
-                    "based on pattern matching with the model's feature extractor configuration. Please open a "
-                    "PR/issue to update `preprocessor_config.json` to use `image_processor_type` instead of "
-                    "`feature_extractor_type`. This warning will be removed in v4.40."
-                )
                image_processor_class = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor")
            if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
                feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]
                image_processor_auto_map = feature_extractor_auto_map.replace("FeatureExtractor", "ImageProcessor")
-                logger.warning(
-                    "Could not find image processor auto map in the image processor config or the model config. "
-                    "Loading based on pattern matching with the model's feature extractor configuration. Please open a "
-                    "PR/issue to update `preprocessor_config.json` to use `AutoImageProcessor` instead of "
-                    "`AutoFeatureExtractor`. This warning will be removed in v4.40."
-                )
        # If we don't find the image processor class in the image processor config, let's try the model config.
        if image_processor_class is None and image_processor_auto_map is None:

--- a/src/transformers/models/cohere/modeling_cohere.py
+++ b/src/transformers/models/cohere/modeling_cohere.py
@@ -23,7 +23,6 @@
 """PyTorch Cohere model."""
 import math
-import warnings
 from typing import List, Optional, Tuple, Union
 import torch
@@ -635,7 +634,6 @@ class CohereDecoderLayer(nn.Module):
        output_attentions: Optional[bool] = False,
        use_cache: Optional[bool] = False,
        cache_position: Optional[torch.LongTensor] = None,
-        **kwargs,
    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
        """
        Args:
@@ -651,11 +649,6 @@ class CohereDecoderLayer(nn.Module):
                (see `past_key_values`).
            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
        """
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        residual = hidden_states
        hidden_states = self.input_layernorm(hidden_states)
@@ -669,7 +662,6 @@ class CohereDecoderLayer(nn.Module):
            output_attentions=output_attentions,
            use_cache=use_cache,
            cache_position=cache_position,
-            **kwargs,
        )
        # Fully Connected

--- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
+++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
@@ -915,31 +915,6 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
            raise ValueError(f"Format {format} is not supported.")
        return target
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare
-    def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
-        logger.warning_once(
-            "The `prepare` method is deprecated and will be removed in a v4.33. "
-            "Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
-            "does not return the image anymore.",
-        )
-        target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
-        return image, target
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.convert_coco_poly_to_mask
-    def convert_coco_poly_to_mask(self, *args, **kwargs):
-        logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
-        return convert_coco_poly_to_mask(*args, **kwargs)
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_detection with DETR->ConditionalDetr
-    def prepare_coco_detection(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_detection_annotation(*args, **kwargs)
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_panoptic
-    def prepare_coco_panoptic(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_panoptic_annotation(*args, **kwargs)
    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize
    def resize(
        self,

--- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py
+++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py
@@ -556,23 +556,7 @@ class DetrAttention(nn.Module):
    def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
        return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
-    def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs):
+    def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        return tensor if object_queries is None else tensor + object_queries
    def forward(
@@ -583,38 +567,8 @@ class DetrAttention(nn.Module):
        key_value_states: Optional[torch.Tensor] = None,
        spatial_position_embeddings: Optional[torch.Tensor] = None,
        output_attentions: bool = False,
-        **kwargs,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
        """Input shape: Batch x Time x Channel"""
-        position_embeddings = kwargs.pop("position_ebmeddings", None)
-        key_value_position_embeddings = kwargs.pop("key_value_position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if key_value_position_embeddings is not None and spatial_position_embeddings is not None:
-            raise ValueError(
-                "Cannot specify both key_value_position_embeddings and spatial_position_embeddings. Please use just spatial_position_embeddings"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
-        if key_value_position_embeddings is not None:
-            logger.warning_once(
-                "key_value_position_embeddings has been deprecated and will be removed in v4.34. Please use spatial_position_embeddings instead"
-            )
-            spatial_position_embeddings = key_value_position_embeddings
        # if key_value_states are provided this layer is used as a cross-attention layer
        # for the decoder
        is_cross_attention = key_value_states is not None
@@ -838,7 +792,6 @@ class ConditionalDetrEncoderLayer(nn.Module):
        attention_mask: torch.Tensor,
        object_queries: torch.Tensor = None,
        output_attentions: bool = False,
-        **kwargs,
    ):
        """
        Args:
@@ -852,22 +805,6 @@ class ConditionalDetrEncoderLayer(nn.Module):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        residual = hidden_states
        hidden_states, attn_weights = self.self_attn(
            hidden_states=hidden_states,
@@ -956,7 +893,6 @@ class ConditionalDetrDecoderLayer(nn.Module):
        encoder_attention_mask: Optional[torch.Tensor] = None,
        output_attentions: Optional[bool] = False,
        is_first: Optional[bool] = False,
-        **kwargs,
    ):
        """
        Args:
@@ -979,22 +915,6 @@ class ConditionalDetrDecoderLayer(nn.Module):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        residual = hidden_states
        # ========== Begin of Self-Attention =============
@@ -1236,7 +1156,6 @@ class ConditionalDetrEncoder(ConditionalDetrPreTrainedModel):
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
-        **kwargs,
    ):
        r"""
        Args:
@@ -1263,22 +1182,6 @@ class ConditionalDetrEncoder(ConditionalDetrPreTrainedModel):
            return_dict (`bool`, *optional*):
                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@@ -1377,7 +1280,6 @@ class ConditionalDetrDecoder(ConditionalDetrPreTrainedModel):
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
-        **kwargs,
    ):
        r"""
        Args:
@@ -1414,22 +1316,6 @@ class ConditionalDetrDecoder(ConditionalDetrPreTrainedModel):
            return_dict (`bool`, *optional*):
                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

--- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
+++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
@@ -913,31 +913,6 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
            raise ValueError(f"Format {format} is not supported.")
        return target
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare
-    def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
-        logger.warning_once(
-            "The `prepare` method is deprecated and will be removed in a v4.33. "
-            "Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
-            "does not return the image anymore.",
-        )
-        target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
-        return image, target
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.convert_coco_poly_to_mask
-    def convert_coco_poly_to_mask(self, *args, **kwargs):
-        logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
-        return convert_coco_poly_to_mask(*args, **kwargs)
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_detection
-    def prepare_coco_detection(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_detection_annotation(*args, **kwargs)
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_panoptic
-    def prepare_coco_panoptic(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_panoptic_annotation(*args, **kwargs)
    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize
    def resize(
        self,

--- a/src/transformers/models/deta/image_processing_deta.py
+++ b/src/transformers/models/deta/image_processing_deta.py
@@ -576,31 +576,6 @@ class DetaImageProcessor(BaseImageProcessor):
            raise ValueError(f"Format {format} is not supported.")
        return target
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare
-    def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
-        logger.warning_once(
-            "The `prepare` method is deprecated and will be removed in a v4.33. "
-            "Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
-            "does not return the image anymore.",
-        )
-        target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
-        return image, target
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.convert_coco_poly_to_mask
-    def convert_coco_poly_to_mask(self, *args, **kwargs):
-        logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
-        return convert_coco_poly_to_mask(*args, **kwargs)
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_detection
-    def prepare_coco_detection(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_detection_annotation(*args, **kwargs)
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_panoptic
-    def prepare_coco_panoptic(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_panoptic_annotation(*args, **kwargs)
    def resize(
        self,
        image: np.ndarray,

--- a/src/transformers/models/detr/image_processing_detr.py
+++ b/src/transformers/models/detr/image_processing_detr.py
@@ -896,27 +896,6 @@ class DetrImageProcessor(BaseImageProcessor):
            raise ValueError(f"Format {format} is not supported.")
        return target
-    def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
-        logger.warning_once(
-            "The `prepare` method is deprecated and will be removed in a v4.33. "
-            "Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
-            "does not return the image anymore.",
-        )
-        target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
-        return image, target
-    def convert_coco_poly_to_mask(self, *args, **kwargs):
-        logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
-        return convert_coco_poly_to_mask(*args, **kwargs)
-    def prepare_coco_detection(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_detection_annotation(*args, **kwargs)
-    def prepare_coco_panoptic(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_panoptic_annotation(*args, **kwargs)
    def resize(
        self,
        image: np.ndarray,

--- a/src/transformers/models/detr/modeling_detr.py
+++ b/src/transformers/models/detr/modeling_detr.py
@@ -524,23 +524,7 @@ class DetrAttention(nn.Module):
    def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
        return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
-    def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs):
+    def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        return tensor if object_queries is None else tensor + object_queries
    def forward(
@@ -551,38 +535,8 @@ class DetrAttention(nn.Module):
        key_value_states: Optional[torch.Tensor] = None,
        spatial_position_embeddings: Optional[torch.Tensor] = None,
        output_attentions: bool = False,
-        **kwargs,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
        """Input shape: Batch x Time x Channel"""
-        position_embeddings = kwargs.pop("position_ebmeddings", None)
-        key_value_position_embeddings = kwargs.pop("key_value_position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if key_value_position_embeddings is not None and spatial_position_embeddings is not None:
-            raise ValueError(
-                "Cannot specify both key_value_position_embeddings and spatial_position_embeddings. Please use just spatial_position_embeddings"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
-        if key_value_position_embeddings is not None:
-            logger.warning_once(
-                "key_value_position_embeddings has been deprecated and will be removed in v4.34. Please use spatial_position_embeddings instead"
-            )
-            spatial_position_embeddings = key_value_position_embeddings
        # if key_value_states are provided this layer is used as a cross-attention layer
        # for the decoder
        is_cross_attention = key_value_states is not None
@@ -688,7 +642,6 @@ class DetrEncoderLayer(nn.Module):
        attention_mask: torch.Tensor,
        object_queries: torch.Tensor = None,
        output_attentions: bool = False,
-        **kwargs,
    ):
        """
        Args:
@@ -702,22 +655,6 @@ class DetrEncoderLayer(nn.Module):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        residual = hidden_states
        hidden_states, attn_weights = self.self_attn(
            hidden_states=hidden_states,
@@ -787,7 +724,6 @@ class DetrDecoderLayer(nn.Module):
        encoder_hidden_states: Optional[torch.Tensor] = None,
        encoder_attention_mask: Optional[torch.Tensor] = None,
        output_attentions: Optional[bool] = False,
-        **kwargs,
    ):
        """
        Args:
@@ -810,22 +746,6 @@ class DetrDecoderLayer(nn.Module):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        residual = hidden_states
        # Self Attention
@@ -995,7 +915,6 @@ class DetrEncoder(DetrPreTrainedModel):
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
-        **kwargs,
    ):
        r"""
        Args:
@@ -1022,22 +941,6 @@ class DetrEncoder(DetrPreTrainedModel):
            return_dict (`bool`, *optional*):
                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@@ -1129,7 +1032,6 @@ class DetrDecoder(DetrPreTrainedModel):
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
-        **kwargs,
    ):
        r"""
        Args:
@@ -1167,22 +1069,6 @@ class DetrDecoder(DetrPreTrainedModel):
            return_dict (`bool`, *optional*):
                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

--- a/src/transformers/models/falcon/modeling_falcon.py
+++ b/src/transformers/models/falcon/modeling_falcon.py
@@ -15,7 +15,6 @@
 """PyTorch Falcon model."""
 import math
-import warnings
 from typing import TYPE_CHECKING, Optional, Tuple, Union
 import torch
@@ -393,13 +392,7 @@ class FalconAttention(nn.Module):
        head_mask: Optional[torch.Tensor] = None,
        use_cache: bool = False,
        output_attentions: bool = False,
-        **kwargs,
    ):
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        fused_qkv = self.query_key_value(hidden_states)  # [batch_size, seq_length, 3 x hidden_size]
        num_kv_heads = self.num_heads if self.new_decoder_architecture else self.num_kv_heads
        # 3 x [batch_size, seq_length, num_heads, head_dim]
@@ -549,16 +542,7 @@ class FalconFlashAttention2(FalconAttention):
        head_mask: Optional[torch.Tensor] = None,
        use_cache: bool = False,
        output_attentions: bool = False,
-        **kwargs,
    ):
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
-            # overwrite attention_mask with padding_mask
-            attention_mask = kwargs.pop("padding_mask")
        fused_qkv = self.query_key_value(hidden_states)  # [batch_size, seq_length, 3 x hidden_size]
        num_kv_heads = self.num_heads if self.new_decoder_architecture else self.num_kv_heads
        # 3 x [batch_size, seq_length, num_heads, head_dim]
@@ -792,13 +776,7 @@ class FalconDecoderLayer(nn.Module):
        head_mask: Optional[torch.Tensor] = None,
        use_cache: bool = False,
        output_attentions: bool = False,
-        **kwargs,
    ):
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        residual = hidden_states
        if self.config.new_decoder_architecture and self.config.num_ln_in_parallel_attn == 2:
@@ -817,7 +795,6 @@ class FalconDecoderLayer(nn.Module):
            head_mask=head_mask,
            use_cache=use_cache,
            output_attentions=output_attentions,
-            **kwargs,
        )
        attention_output = attn_outputs[0]

--- a/src/transformers/models/gemma/modeling_gemma.py
+++ b/src/transformers/models/gemma/modeling_gemma.py
@@ -16,7 +16,6 @@
 """ PyTorch Gemma model."""
 import math
-import warnings
 from typing import List, Optional, Tuple, Union
 import torch
@@ -616,7 +615,6 @@ class GemmaDecoderLayer(nn.Module):
        output_attentions: Optional[bool] = False,
        use_cache: Optional[bool] = False,
        cache_position: Optional[torch.LongTensor] = None,
-        **kwargs,
    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
        """
        Args:
@@ -632,11 +630,6 @@ class GemmaDecoderLayer(nn.Module):
                (see `past_key_values`).
            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
        """
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        residual = hidden_states
        hidden_states = self.input_layernorm(hidden_states)
@@ -650,7 +643,6 @@ class GemmaDecoderLayer(nn.Module):
            output_attentions=output_attentions,
            use_cache=use_cache,
            cache_position=cache_position,
-            **kwargs,
        )
        hidden_states = residual + hidden_states

--- a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py
+++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py
@@ -920,31 +920,6 @@ class GroundingDinoImageProcessor(BaseImageProcessor):
            raise ValueError(f"Format {format} is not supported.")
        return target
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare
-    def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
-        logger.warning_once(
-            "The `prepare` method is deprecated and will be removed in a v4.33. "
-            "Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
-            "does not return the image anymore.",
-        )
-        target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
-        return image, target
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.convert_coco_poly_to_mask
-    def convert_coco_poly_to_mask(self, *args, **kwargs):
-        logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
-        return convert_coco_poly_to_mask(*args, **kwargs)
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_detection
-    def prepare_coco_detection(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_detection_annotation(*args, **kwargs)
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_panoptic
-    def prepare_coco_panoptic(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_panoptic_annotation(*args, **kwargs)
    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize
    def resize(
        self,

--- a/src/transformers/models/llama/modeling_llama.py
+++ b/src/transformers/models/llama/modeling_llama.py
@@ -20,7 +20,6 @@
 """PyTorch LLaMA model."""
 import math
-import warnings
 from typing import List, Optional, Tuple, Union
 import torch
@@ -104,29 +103,6 @@ class LlamaRotaryEmbedding(nn.Module):
        self.register_buffer("inv_freq", inv_freq, persistent=False)
        # For BC we register cos and sin cached
        self.max_seq_len_cached = max_position_embeddings
-        t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq)
-        t = t / self.scaling_factor
-        freqs = torch.outer(t, self.inv_freq)
-        # Different from paper, but it uses a different permutation in order to obtain the same calculation
-        emb = torch.cat((freqs, freqs), dim=-1)
-        self.register_buffer("_cos_cached", emb.cos().to(torch.get_default_dtype()), persistent=False)
-        self.register_buffer("_sin_cached", emb.sin().to(torch.get_default_dtype()), persistent=False)
-    @property
-    def sin_cached(self):
-        logger.warning_once(
-            "The sin_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use "
-            "the forward method of RoPE from now on instead. It is not used in the `LlamaAttention` class"
-        )
-        return self._sin_cached
-    @property
-    def cos_cached(self):
-        logger.warning_once(
-            "The cos_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use "
-            "the forward method of RoPE from now on instead. It is not used in the `LlamaAttention` class"
-        )
-        return self._cos_cached
    @torch.no_grad()
    def forward(self, x, position_ids):
@@ -714,7 +690,6 @@ class LlamaDecoderLayer(nn.Module):
        output_attentions: Optional[bool] = False,
        use_cache: Optional[bool] = False,
        cache_position: Optional[torch.LongTensor] = None,
-        **kwargs,
    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
        """
        Args:
@@ -730,11 +705,6 @@ class LlamaDecoderLayer(nn.Module):
                (see `past_key_values`).
            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
        """
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        residual = hidden_states
        hidden_states = self.input_layernorm(hidden_states)
@@ -748,7 +718,6 @@ class LlamaDecoderLayer(nn.Module):
            output_attentions=output_attentions,
            use_cache=use_cache,
            cache_position=cache_position,
-            **kwargs,
        )
        hidden_states = residual + hidden_states

--- a/src/transformers/models/maskformer/modeling_maskformer.py
+++ b/src/transformers/models/maskformer/modeling_maskformer.py
@@ -440,23 +440,7 @@ class DetrAttention(nn.Module):
    def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
        return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
-    def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs):
+    def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        return tensor if object_queries is None else tensor + object_queries
    def forward(
@@ -467,38 +451,8 @@ class DetrAttention(nn.Module):
        key_value_states: Optional[torch.Tensor] = None,
        spatial_position_embeddings: Optional[torch.Tensor] = None,
        output_attentions: bool = False,
-        **kwargs,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
        """Input shape: Batch x Time x Channel"""
-        position_embeddings = kwargs.pop("position_ebmeddings", None)
-        key_value_position_embeddings = kwargs.pop("key_value_position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if key_value_position_embeddings is not None and spatial_position_embeddings is not None:
-            raise ValueError(
-                "Cannot specify both key_value_position_embeddings and spatial_position_embeddings. Please use just spatial_position_embeddings"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
-        if key_value_position_embeddings is not None:
-            logger.warning_once(
-                "key_value_position_embeddings has been deprecated and will be removed in v4.34. Please use spatial_position_embeddings instead"
-            )
-            spatial_position_embeddings = key_value_position_embeddings
        # if key_value_states are provided this layer is used as a cross-attention layer
        # for the decoder
        is_cross_attention = key_value_states is not None
@@ -616,7 +570,6 @@ class DetrDecoderLayer(nn.Module):
        encoder_hidden_states: Optional[torch.Tensor] = None,
        encoder_attention_mask: Optional[torch.Tensor] = None,
        output_attentions: Optional[bool] = False,
-        **kwargs,
    ):
        """
        Args:
@@ -639,22 +592,6 @@ class DetrDecoderLayer(nn.Module):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        residual = hidden_states
        # Self Attention
@@ -742,7 +679,6 @@ class DetrDecoder(nn.Module):
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
-        **kwargs,
    ):
        r"""
        Args:
@@ -779,21 +715,6 @@ class DetrDecoder(nn.Module):
            return_dict (`bool`, *optional*):
                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

--- a/src/transformers/models/mistral/modeling_mistral.py
+++ b/src/transformers/models/mistral/modeling_mistral.py
@@ -20,7 +20,6 @@
 """ PyTorch Mistral model."""
 import inspect
 import math
-import warnings
 from typing import List, Optional, Tuple, Union
 import torch
@@ -246,12 +245,7 @@ class MistralAttention(nn.Module):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        bsz, q_len, _ = hidden_states.size()
        query_states = self.q_proj(hidden_states)
@@ -344,15 +338,7 @@ class MistralFlashAttention2(MistralAttention):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ):
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
-            # overwrite attention_mask with padding_mask
-            attention_mask = kwargs.pop("padding_mask")
        bsz, q_len, _ = hidden_states.size()
        query_states = self.q_proj(hidden_states)
@@ -729,12 +715,7 @@ class MistralDecoderLayer(nn.Module):
        past_key_value: Optional[Tuple[torch.Tensor]] = None,
        output_attentions: Optional[bool] = False,
        use_cache: Optional[bool] = False,
-        **kwargs,
    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        """
        Args:
            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`

--- a/src/transformers/models/mixtral/modeling_mixtral.py
+++ b/src/transformers/models/mixtral/modeling_mixtral.py
@@ -20,7 +20,6 @@
 """ PyTorch Mixtral model."""
 import inspect
 import math
-import warnings
 from typing import List, Optional, Tuple, Union
 import torch
@@ -323,12 +322,7 @@ class MixtralAttention(nn.Module):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        bsz, q_len, _ = hidden_states.size()
        query_states = self.q_proj(hidden_states)
@@ -422,15 +416,7 @@ class MixtralFlashAttention2(MixtralAttention):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ):
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
-            # overwrite attention_mask with padding_mask
-            attention_mask = kwargs.pop("padding_mask")
        bsz, q_len, _ = hidden_states.size()
        query_states = self.q_proj(hidden_states)
@@ -805,14 +791,6 @@ class MixtralBlockSparseTop2MLP(nn.Module):
        return current_hidden_states
-class MixtralBLockSparseTop2MLP(MixtralBlockSparseTop2MLP):
-    def __init__(self, *args, **kwargs):
-        logger.warning_once(
-            "MixtralBLockSparseTop2MLP is deprecated by MixtralBlockSparseTop2MLP and will be removed in v4.40."
-        )
-        super().__init__(*args, **kwargs)
 class MixtralSparseMoeBlock(nn.Module):
    """
    This implementation is
@@ -901,12 +879,7 @@ class MixtralDecoderLayer(nn.Module):
        output_attentions: Optional[bool] = False,
        output_router_logits: Optional[bool] = False,
        use_cache: Optional[bool] = False,
-        **kwargs,
    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        """
        Args:
            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`

--- a/src/transformers/models/olmo/modeling_olmo.py
+++ b/src/transformers/models/olmo/modeling_olmo.py
@@ -20,7 +20,6 @@
 """PyTorch OLMo model."""
 import math
-import warnings
 from typing import List, Optional, Tuple, Union
 import torch
@@ -101,29 +100,6 @@ class OlmoRotaryEmbedding(nn.Module):
        self.register_buffer("inv_freq", inv_freq, persistent=False)
        # For BC we register cos and sin cached
        self.max_seq_len_cached = max_position_embeddings
-        t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq)
-        t = t / self.scaling_factor
-        freqs = torch.outer(t, self.inv_freq)
-        # Different from paper, but it uses a different permutation in order to obtain the same calculation
-        emb = torch.cat((freqs, freqs), dim=-1)
-        self.register_buffer("_cos_cached", emb.cos().to(torch.get_default_dtype()), persistent=False)
-        self.register_buffer("_sin_cached", emb.sin().to(torch.get_default_dtype()), persistent=False)
-    @property
-    def sin_cached(self):
-        logger.warning_once(
-            "The sin_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use "
-            "the forward method of RoPE from now on instead. It is not used in the `OlmoAttention` class"
-        )
-        return self._sin_cached
-    @property
-    def cos_cached(self):
-        logger.warning_once(
-            "The cos_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use "
-            "the forward method of RoPE from now on instead. It is not used in the `OlmoAttention` class"
-        )
-        return self._cos_cached
    @torch.no_grad()
    def forward(self, x, position_ids):
@@ -690,7 +666,6 @@ class OlmoDecoderLayer(nn.Module):
        output_attentions: Optional[bool] = False,
        use_cache: Optional[bool] = False,
        cache_position: Optional[torch.LongTensor] = None,
-        **kwargs,
    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
        """
        Args:
@@ -706,11 +681,6 @@ class OlmoDecoderLayer(nn.Module):
                (see `past_key_values`).
            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
        """
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        residual = hidden_states
        hidden_states = self.input_layernorm(hidden_states)
@@ -724,7 +694,6 @@ class OlmoDecoderLayer(nn.Module):
            output_attentions=output_attentions,
            use_cache=use_cache,
            cache_position=cache_position,
-            **kwargs,
        )
        hidden_states = residual + hidden_states

--- a/src/transformers/models/owlv2/modeling_owlv2.py
+++ b/src/transformers/models/owlv2/modeling_owlv2.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """ PyTorch OWLv2 model."""
-import warnings
 from dataclasses import dataclass
 from functools import lru_cache
 from typing import Any, Dict, Optional, Tuple, Union
@@ -1197,16 +1196,7 @@ class Owlv2Model(Owlv2PreTrainedModel):
        if return_loss:
            loss = owlv2_loss(logits_per_text)
-        if return_base_image_embeds:
+        text_embeds = text_embeds_norm
-            warnings.warn(
-                "`return_base_image_embeds` is deprecated and will be removed in v4.27 of Transformers, one can"
-                " obtain the base (unprojected) image embeddings from outputs.vision_model_output.",
-                FutureWarning,
-            )
-            last_hidden_state = vision_outputs[0]
-            image_embeds = self.vision_model.post_layernorm(last_hidden_state)
-        else:
-            text_embeds = text_embeds_norm
        if not return_dict:
            output = (logits_per_image, logits_per_text, text_embeds, image_embeds, text_outputs, vision_outputs)