Unverified Commit 57c965a8 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Remove deprecated logic and warnings (#30743)

* Remove deprecated logic and warnings

* Add back some code that seems to be important...

* Let's just add all he nllb stuff back; removing it is a bit more involved

* Remove kwargs

* Remove more kwargs
parent 3d7d3a87
......@@ -14,7 +14,6 @@
# limitations under the License.
""" PyTorch OWL-ViT model."""
import warnings
from dataclasses import dataclass
from functools import lru_cache
from typing import Any, Dict, Optional, Tuple, Union
......@@ -1180,15 +1179,6 @@ class OwlViTModel(OwlViTPreTrainedModel):
if return_loss:
loss = owlvit_loss(logits_per_text)
if return_base_image_embeds:
warnings.warn(
"`return_base_image_embeds` is deprecated and will be removed in v4.27 of Transformers, one can"
" obtain the base (unprojected) image embeddings from outputs.vision_model_output.",
FutureWarning,
)
last_hidden_state = vision_outputs[0]
image_embeds = self.vision_model.post_layernorm(last_hidden_state)
else:
text_embeds = text_embeds_norm
if not return_dict:
......
......@@ -17,7 +17,6 @@
import inspect
import math
import warnings
from typing import List, Optional, Tuple, Union
import torch
......@@ -430,7 +429,6 @@ class Phi3FlashAttention2(Phi3Attention):
past_key_value: Optional[Cache] = None,
output_attentions: bool = False,
use_cache: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
# Phi3FlashAttention2 attention does not support output_attentions
......@@ -442,14 +440,6 @@ class Phi3FlashAttention2(Phi3Attention):
output_attentions = False
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
# overwrite attention_mask with padding_mask
attention_mask = kwargs.pop("padding_mask")
bsz, q_len, _ = hidden_states.size()
qkv = self.qkv_proj(hidden_states)
......@@ -835,12 +825,7 @@ class Phi3DecoderLayer(nn.Module):
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
"""
Args:
hidden_states (`torch.FloatTensor`):
......
......@@ -20,7 +20,6 @@
""" PyTorch Qwen2 model."""
import inspect
import math
import warnings
from typing import List, Optional, Tuple, Union
import torch
......@@ -244,12 +243,7 @@ class Qwen2Attention(nn.Module):
past_key_value: Optional[Cache] = None,
output_attentions: bool = False,
use_cache: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states)
......@@ -344,15 +338,7 @@ class Qwen2FlashAttention2(Qwen2Attention):
past_key_value: Optional[Cache] = None,
output_attentions: bool = False,
use_cache: bool = False,
**kwargs,
):
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
# overwrite attention_mask with padding_mask
attention_mask = kwargs.pop("padding_mask")
bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states)
......@@ -739,13 +725,7 @@ class Qwen2DecoderLayer(nn.Module):
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. "
"Please make sure use `attention_mask` instead.`"
)
"""
Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
......
......@@ -21,7 +21,6 @@
import inspect
import math
import warnings
from typing import List, Optional, Tuple, Union
import torch
......@@ -321,12 +320,7 @@ class Qwen2MoeAttention(nn.Module):
past_key_value: Optional[Cache] = None,
output_attentions: bool = False,
use_cache: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states)
......@@ -422,15 +416,7 @@ class Qwen2MoeFlashAttention2(Qwen2MoeAttention):
past_key_value: Optional[Cache] = None,
output_attentions: bool = False,
use_cache: bool = False,
**kwargs,
):
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
# overwrite attention_mask with padding_mask
attention_mask = kwargs.pop("padding_mask")
bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states)
......@@ -881,13 +867,7 @@ class Qwen2MoeDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False,
output_router_logits: Optional[bool] = False,
use_cache: Optional[bool] = False,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. "
"Please make sure use `attention_mask` instead.`"
)
"""
Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
......
......@@ -279,11 +279,6 @@ class SEWDConfig(PretrainedConfig):
def inputs_to_logits_ratio(self):
return functools.reduce(operator.mul, self.conv_stride, 1)
@property
def hidden_dropout(self):
logger.warning_once("hidden_dropout is not used by the model and will be removed as config attribute in v4.35")
return self._hidden_dropout
def to_dict(self):
"""
Serializes this instance to a Python dictionary.
......
......@@ -20,7 +20,6 @@
""" PyTorch Starcoder2 model."""
import inspect
import math
import warnings
from typing import List, Optional, Tuple, Union
import torch
......@@ -227,12 +226,7 @@ class Starcoder2Attention(nn.Module):
past_key_value: Optional[Cache] = None,
output_attentions: bool = False,
use_cache: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states)
......@@ -328,15 +322,7 @@ class Starcoder2FlashAttention2(Starcoder2Attention):
past_key_value: Optional[Cache] = None,
output_attentions: bool = False,
use_cache: bool = False,
**kwargs,
):
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
# overwrite attention_mask with padding_mask
attention_mask = kwargs.pop("padding_mask")
bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states)
......@@ -717,12 +703,7 @@ class Starcoder2DecoderLayer(nn.Module):
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
"""
Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
......
......@@ -461,23 +461,7 @@ class TableTransformerAttention(nn.Module):
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs):
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
return tensor if object_queries is None else tensor + object_queries
def forward(
......@@ -488,38 +472,8 @@ class TableTransformerAttention(nn.Module):
key_value_states: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel"""
position_embeddings = kwargs.pop("position_ebmeddings", None)
key_value_position_embeddings = kwargs.pop("key_value_position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if key_value_position_embeddings is not None and spatial_position_embeddings is not None:
raise ValueError(
"Cannot specify both key_value_position_embeddings and spatial_position_embeddings. Please use just spatial_position_embeddings"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
if key_value_position_embeddings is not None:
logger.warning_once(
"key_value_position_embeddings has been deprecated and will be removed in v4.34. Please use spatial_position_embeddings instead"
)
spatial_position_embeddings = key_value_position_embeddings
# if key_value_states are provided this layer is used as a cross-attention layer
# for the decoder
is_cross_attention = key_value_states is not None
......@@ -1020,7 +974,6 @@ class TableTransformerDecoder(TableTransformerPreTrainedModel):
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs,
):
r"""
Args:
......@@ -1058,22 +1011,6 @@ class TableTransformerDecoder(TableTransformerPreTrainedModel):
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
......
......@@ -820,31 +820,6 @@ class YolosImageProcessor(BaseImageProcessor):
raise ValueError(f"Format {format} is not supported.")
return target
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare
def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
logger.warning_once(
"The `prepare` method is deprecated and will be removed in a v4.33. "
"Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
"does not return the image anymore.",
)
target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
return image, target
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.convert_coco_poly_to_mask
def convert_coco_poly_to_mask(self, *args, **kwargs):
logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
return convert_coco_poly_to_mask(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_detection with DETR->Yolos
def prepare_coco_detection(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
return prepare_coco_detection_annotation(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_panoptic
def prepare_coco_panoptic(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
return prepare_coco_panoptic_annotation(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize
def resize(
self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment