Remove deprecated logic and warnings (#30743)

* Remove deprecated logic and warnings * Add back some code that seems to be important... * Let's just add all he nllb stuff back; removing it is a bit more involved * Remove kwargs * Remove more kwargs

Remove deprecated logic and warnings (#30743)
* Remove deprecated logic and warnings * Add back some code that seems to be important... * Let's just add all he nllb stuff back; removing it is a bit more involved * Remove kwargs * Remove more kwargs
57c965a8 · amyeroberts · GitHub · 3d7d3a87 · 57c965a8 · 57c965a8
Unverified Commit 57c965a8 authored May 17, 2024 by amyeroberts Committed by GitHub May 17, 2024
8 changed files
--- a/src/transformers/models/owlvit/modeling_owlvit.py
+++ b/src/transformers/models/owlvit/modeling_owlvit.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """ PyTorch OWL-ViT model."""
-import warnings
 from dataclasses import dataclass
 from functools import lru_cache
 from typing import Any, Dict, Optional, Tuple, Union
@@ -1180,15 +1179,6 @@ class OwlViTModel(OwlViTPreTrainedModel):
        if return_loss:
            loss = owlvit_loss(logits_per_text)
-        if return_base_image_embeds:
-            warnings.warn(
-                "`return_base_image_embeds` is deprecated and will be removed in v4.27 of Transformers, one can"
-                " obtain the base (unprojected) image embeddings from outputs.vision_model_output.",
-                FutureWarning,
-            )
-            last_hidden_state = vision_outputs[0]
-            image_embeds = self.vision_model.post_layernorm(last_hidden_state)
-        else:
        text_embeds = text_embeds_norm
        if not return_dict:

--- a/src/transformers/models/phi3/modeling_phi3.py
+++ b/src/transformers/models/phi3/modeling_phi3.py
@@ -17,7 +17,6 @@
 import inspect
 import math
-import warnings
 from typing import List, Optional, Tuple, Union
 import torch
@@ -430,7 +429,6 @@ class Phi3FlashAttention2(Phi3Attention):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
        # Phi3FlashAttention2 attention does not support output_attentions
@@ -442,14 +440,6 @@ class Phi3FlashAttention2(Phi3Attention):
        output_attentions = False
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
-            # overwrite attention_mask with padding_mask
-            attention_mask = kwargs.pop("padding_mask")
        bsz, q_len, _ = hidden_states.size()
        qkv = self.qkv_proj(hidden_states)
@@ -835,12 +825,7 @@ class Phi3DecoderLayer(nn.Module):
        past_key_value: Optional[Tuple[torch.Tensor]] = None,
        output_attentions: Optional[bool] = False,
        use_cache: Optional[bool] = False,
-        **kwargs,
    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        """
        Args:
            hidden_states (`torch.FloatTensor`):

--- a/src/transformers/models/qwen2/modeling_qwen2.py
+++ b/src/transformers/models/qwen2/modeling_qwen2.py
@@ -20,7 +20,6 @@
 """ PyTorch Qwen2 model."""
 import inspect
 import math
-import warnings
 from typing import List, Optional, Tuple, Union
 import torch
@@ -244,12 +243,7 @@ class Qwen2Attention(nn.Module):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        bsz, q_len, _ = hidden_states.size()
        query_states = self.q_proj(hidden_states)
@@ -344,15 +338,7 @@ class Qwen2FlashAttention2(Qwen2Attention):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ):
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
-            # overwrite attention_mask with padding_mask
-            attention_mask = kwargs.pop("padding_mask")
        bsz, q_len, _ = hidden_states.size()
        query_states = self.q_proj(hidden_states)
@@ -739,13 +725,7 @@ class Qwen2DecoderLayer(nn.Module):
        past_key_value: Optional[Tuple[torch.Tensor]] = None,
        output_attentions: Optional[bool] = False,
        use_cache: Optional[bool] = False,
-        **kwargs,
    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. "
-                "Please make sure use `attention_mask` instead.`"
-            )
        """
        Args:
            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`

--- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py
+++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py
@@ -21,7 +21,6 @@
 import inspect
 import math
-import warnings
 from typing import List, Optional, Tuple, Union
 import torch
@@ -321,12 +320,7 @@ class Qwen2MoeAttention(nn.Module):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        bsz, q_len, _ = hidden_states.size()
        query_states = self.q_proj(hidden_states)
@@ -422,15 +416,7 @@ class Qwen2MoeFlashAttention2(Qwen2MoeAttention):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ):
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
-            # overwrite attention_mask with padding_mask
-            attention_mask = kwargs.pop("padding_mask")
        bsz, q_len, _ = hidden_states.size()
        query_states = self.q_proj(hidden_states)
@@ -881,13 +867,7 @@ class Qwen2MoeDecoderLayer(nn.Module):
        output_attentions: Optional[bool] = False,
        output_router_logits: Optional[bool] = False,
        use_cache: Optional[bool] = False,
-        **kwargs,
    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. "
-                "Please make sure use `attention_mask` instead.`"
-            )
        """
        Args:
            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`

--- a/src/transformers/models/sew_d/configuration_sew_d.py
+++ b/src/transformers/models/sew_d/configuration_sew_d.py
@@ -279,11 +279,6 @@ class SEWDConfig(PretrainedConfig):
    def inputs_to_logits_ratio(self):
        return functools.reduce(operator.mul, self.conv_stride, 1)
-    @property
-    def hidden_dropout(self):
-        logger.warning_once("hidden_dropout is not used by the model and will be removed as config attribute in v4.35")
-        return self._hidden_dropout
    def to_dict(self):
        """
        Serializes this instance to a Python dictionary.

--- a/src/transformers/models/starcoder2/modeling_starcoder2.py
+++ b/src/transformers/models/starcoder2/modeling_starcoder2.py
@@ -20,7 +20,6 @@
 """ PyTorch Starcoder2 model."""
 import inspect
 import math
-import warnings
 from typing import List, Optional, Tuple, Union
 import torch
@@ -227,12 +226,7 @@ class Starcoder2Attention(nn.Module):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        bsz, q_len, _ = hidden_states.size()
        query_states = self.q_proj(hidden_states)
@@ -328,15 +322,7 @@ class Starcoder2FlashAttention2(Starcoder2Attention):
        past_key_value: Optional[Cache] = None,
        output_attentions: bool = False,
        use_cache: bool = False,
-        **kwargs,
    ):
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
-            # overwrite attention_mask with padding_mask
-            attention_mask = kwargs.pop("padding_mask")
        bsz, q_len, _ = hidden_states.size()
        query_states = self.q_proj(hidden_states)
@@ -717,12 +703,7 @@ class Starcoder2DecoderLayer(nn.Module):
        past_key_value: Optional[Tuple[torch.Tensor]] = None,
        output_attentions: Optional[bool] = False,
        use_cache: Optional[bool] = False,
-        **kwargs,
    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
-        if "padding_mask" in kwargs:
-            warnings.warn(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
        """
        Args:
            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`

--- a/src/transformers/models/table_transformer/modeling_table_transformer.py
+++ b/src/transformers/models/table_transformer/modeling_table_transformer.py
@@ -461,23 +461,7 @@ class TableTransformerAttention(nn.Module):
    def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
        return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
-    def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs):
+    def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        return tensor if object_queries is None else tensor + object_queries
    def forward(
@@ -488,38 +472,8 @@ class TableTransformerAttention(nn.Module):
        key_value_states: Optional[torch.Tensor] = None,
        spatial_position_embeddings: Optional[torch.Tensor] = None,
        output_attentions: bool = False,
-        **kwargs,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
        """Input shape: Batch x Time x Channel"""
-        position_embeddings = kwargs.pop("position_ebmeddings", None)
-        key_value_position_embeddings = kwargs.pop("key_value_position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if key_value_position_embeddings is not None and spatial_position_embeddings is not None:
-            raise ValueError(
-                "Cannot specify both key_value_position_embeddings and spatial_position_embeddings. Please use just spatial_position_embeddings"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
-        if key_value_position_embeddings is not None:
-            logger.warning_once(
-                "key_value_position_embeddings has been deprecated and will be removed in v4.34. Please use spatial_position_embeddings instead"
-            )
-            spatial_position_embeddings = key_value_position_embeddings
        # if key_value_states are provided this layer is used as a cross-attention layer
        # for the decoder
        is_cross_attention = key_value_states is not None
@@ -1020,7 +974,6 @@ class TableTransformerDecoder(TableTransformerPreTrainedModel):
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
-        **kwargs,
    ):
        r"""
        Args:
@@ -1058,22 +1011,6 @@ class TableTransformerDecoder(TableTransformerPreTrainedModel):
            return_dict (`bool`, *optional*):
                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
        """
-        position_embeddings = kwargs.pop("position_embeddings", None)
-        if kwargs:
-            raise ValueError(f"Unexpected arguments {kwargs.keys()}")
-        if position_embeddings is not None and object_queries is not None:
-            raise ValueError(
-                "Cannot specify both position_embeddings and object_queries. Please use just object_queries"
-            )
-        if position_embeddings is not None:
-            logger.warning_once(
-                "position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
-            )
-            object_queries = position_embeddings
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

--- a/src/transformers/models/yolos/image_processing_yolos.py
+++ b/src/transformers/models/yolos/image_processing_yolos.py
@@ -820,31 +820,6 @@ class YolosImageProcessor(BaseImageProcessor):
            raise ValueError(f"Format {format} is not supported.")
        return target
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare
-    def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
-        logger.warning_once(
-            "The `prepare` method is deprecated and will be removed in a v4.33. "
-            "Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
-            "does not return the image anymore.",
-        )
-        target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
-        return image, target
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.convert_coco_poly_to_mask
-    def convert_coco_poly_to_mask(self, *args, **kwargs):
-        logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
-        return convert_coco_poly_to_mask(*args, **kwargs)
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_detection with DETR->Yolos
-    def prepare_coco_detection(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_detection_annotation(*args, **kwargs)
-    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_panoptic
-    def prepare_coco_panoptic(self, *args, **kwargs):
-        logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
-        return prepare_coco_panoptic_annotation(*args, **kwargs)
    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize
    def resize(
        self,