"src/vscode:/vscode.git/clone" did not exist on "4d806dba8ca6ba714fd7b95c112dd8514136c9af"
Unverified Commit 57c965a8 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Remove deprecated logic and warnings (#30743)

* Remove deprecated logic and warnings

* Add back some code that seems to be important...

* Let's just add all he nllb stuff back; removing it is a bit more involved

* Remove kwargs

* Remove more kwargs
parent 3d7d3a87
...@@ -21,7 +21,6 @@ import json ...@@ -21,7 +21,6 @@ import json
import logging import logging
import os import os
import random import random
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -85,12 +84,6 @@ class ModelArguments: ...@@ -85,12 +84,6 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code: bool = field( trust_remote_code: bool = field(
default=False, default=False,
metadata={ metadata={
...@@ -213,15 +206,6 @@ def main(): ...@@ -213,15 +206,6 @@ def main():
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TFTrainingArguments)) parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TFTrainingArguments))
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
FutureWarning,
)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_ner", model_args, data_args, framework="tensorflow") send_example_telemetry("run_ner", model_args, data_args, framework="tensorflow")
......
...@@ -22,7 +22,6 @@ import json ...@@ -22,7 +22,6 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -103,12 +102,6 @@ class ModelArguments: ...@@ -103,12 +102,6 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code: bool = field( trust_remote_code: bool = field(
default=False, default=False,
metadata={ metadata={
...@@ -285,15 +278,6 @@ def main(): ...@@ -285,15 +278,6 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
FutureWarning,
)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_translation", model_args, data_args, framework="tensorflow") send_example_telemetry("run_translation", model_args, data_args, framework="tensorflow")
......
...@@ -727,23 +727,11 @@ class ImageFeatureExtractionMixin: ...@@ -727,23 +727,11 @@ class ImageFeatureExtractionMixin:
) )
def promote_annotation_format(annotation_format: Union[AnnotionFormat, AnnotationFormat]) -> AnnotationFormat:
# can be removed when `AnnotionFormat` is fully deprecated
return AnnotationFormat(annotation_format.value)
def validate_annotations( def validate_annotations(
annotation_format: AnnotationFormat, annotation_format: AnnotationFormat,
supported_annotation_formats: Tuple[AnnotationFormat, ...], supported_annotation_formats: Tuple[AnnotationFormat, ...],
annotations: List[Dict], annotations: List[Dict],
) -> None: ) -> None:
if isinstance(annotation_format, AnnotionFormat):
logger.warning_once(
f"`{annotation_format.__class__.__name__}` is deprecated and will be removed in v4.38. "
f"Please use `{AnnotationFormat.__name__}` instead."
)
annotation_format = promote_annotation_format(annotation_format)
if annotation_format not in supported_annotation_formats: if annotation_format not in supported_annotation_formats:
raise ValueError(f"Unsupported annotation format: {format} must be one of {supported_annotation_formats}") raise ValueError(f"Unsupported annotation format: {format} must be one of {supported_annotation_formats}")
......
...@@ -371,22 +371,10 @@ class AutoImageProcessor: ...@@ -371,22 +371,10 @@ class AutoImageProcessor:
if image_processor_class is None and image_processor_auto_map is None: if image_processor_class is None and image_processor_auto_map is None:
feature_extractor_class = config_dict.pop("feature_extractor_type", None) feature_extractor_class = config_dict.pop("feature_extractor_type", None)
if feature_extractor_class is not None: if feature_extractor_class is not None:
logger.warning(
"Could not find image processor class in the image processor config or the model config. Loading "
"based on pattern matching with the model's feature extractor configuration. Please open a "
"PR/issue to update `preprocessor_config.json` to use `image_processor_type` instead of "
"`feature_extractor_type`. This warning will be removed in v4.40."
)
image_processor_class = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor") image_processor_class = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor")
if "AutoFeatureExtractor" in config_dict.get("auto_map", {}): if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"] feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]
image_processor_auto_map = feature_extractor_auto_map.replace("FeatureExtractor", "ImageProcessor") image_processor_auto_map = feature_extractor_auto_map.replace("FeatureExtractor", "ImageProcessor")
logger.warning(
"Could not find image processor auto map in the image processor config or the model config. "
"Loading based on pattern matching with the model's feature extractor configuration. Please open a "
"PR/issue to update `preprocessor_config.json` to use `AutoImageProcessor` instead of "
"`AutoFeatureExtractor`. This warning will be removed in v4.40."
)
# If we don't find the image processor class in the image processor config, let's try the model config. # If we don't find the image processor class in the image processor config, let's try the model config.
if image_processor_class is None and image_processor_auto_map is None: if image_processor_class is None and image_processor_auto_map is None:
......
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
"""PyTorch Cohere model.""" """PyTorch Cohere model."""
import math import math
import warnings
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import torch import torch
...@@ -635,7 +634,6 @@ class CohereDecoderLayer(nn.Module): ...@@ -635,7 +634,6 @@ class CohereDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False, use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
""" """
Args: Args:
...@@ -651,11 +649,6 @@ class CohereDecoderLayer(nn.Module): ...@@ -651,11 +649,6 @@ class CohereDecoderLayer(nn.Module):
(see `past_key_values`). (see `past_key_values`).
past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
""" """
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
residual = hidden_states residual = hidden_states
hidden_states = self.input_layernorm(hidden_states) hidden_states = self.input_layernorm(hidden_states)
...@@ -669,7 +662,6 @@ class CohereDecoderLayer(nn.Module): ...@@ -669,7 +662,6 @@ class CohereDecoderLayer(nn.Module):
output_attentions=output_attentions, output_attentions=output_attentions,
use_cache=use_cache, use_cache=use_cache,
cache_position=cache_position, cache_position=cache_position,
**kwargs,
) )
# Fully Connected # Fully Connected
......
...@@ -915,31 +915,6 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): ...@@ -915,31 +915,6 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
raise ValueError(f"Format {format} is not supported.") raise ValueError(f"Format {format} is not supported.")
return target return target
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare
def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
logger.warning_once(
"The `prepare` method is deprecated and will be removed in a v4.33. "
"Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
"does not return the image anymore.",
)
target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
return image, target
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.convert_coco_poly_to_mask
def convert_coco_poly_to_mask(self, *args, **kwargs):
logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
return convert_coco_poly_to_mask(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_detection with DETR->ConditionalDetr
def prepare_coco_detection(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
return prepare_coco_detection_annotation(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_panoptic
def prepare_coco_panoptic(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
return prepare_coco_panoptic_annotation(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize
def resize( def resize(
self, self,
......
...@@ -556,23 +556,7 @@ class DetrAttention(nn.Module): ...@@ -556,23 +556,7 @@ class DetrAttention(nn.Module):
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int): def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs): def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
return tensor if object_queries is None else tensor + object_queries return tensor if object_queries is None else tensor + object_queries
def forward( def forward(
...@@ -583,38 +567,8 @@ class DetrAttention(nn.Module): ...@@ -583,38 +567,8 @@ class DetrAttention(nn.Module):
key_value_states: Optional[torch.Tensor] = None, key_value_states: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None, spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False, output_attentions: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
position_embeddings = kwargs.pop("position_ebmeddings", None)
key_value_position_embeddings = kwargs.pop("key_value_position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if key_value_position_embeddings is not None and spatial_position_embeddings is not None:
raise ValueError(
"Cannot specify both key_value_position_embeddings and spatial_position_embeddings. Please use just spatial_position_embeddings"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
if key_value_position_embeddings is not None:
logger.warning_once(
"key_value_position_embeddings has been deprecated and will be removed in v4.34. Please use spatial_position_embeddings instead"
)
spatial_position_embeddings = key_value_position_embeddings
# if key_value_states are provided this layer is used as a cross-attention layer # if key_value_states are provided this layer is used as a cross-attention layer
# for the decoder # for the decoder
is_cross_attention = key_value_states is not None is_cross_attention = key_value_states is not None
...@@ -838,7 +792,6 @@ class ConditionalDetrEncoderLayer(nn.Module): ...@@ -838,7 +792,6 @@ class ConditionalDetrEncoderLayer(nn.Module):
attention_mask: torch.Tensor, attention_mask: torch.Tensor,
object_queries: torch.Tensor = None, object_queries: torch.Tensor = None,
output_attentions: bool = False, output_attentions: bool = False,
**kwargs,
): ):
""" """
Args: Args:
...@@ -852,22 +805,6 @@ class ConditionalDetrEncoderLayer(nn.Module): ...@@ -852,22 +805,6 @@ class ConditionalDetrEncoderLayer(nn.Module):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail. returned tensors for more detail.
""" """
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
residual = hidden_states residual = hidden_states
hidden_states, attn_weights = self.self_attn( hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states, hidden_states=hidden_states,
...@@ -956,7 +893,6 @@ class ConditionalDetrDecoderLayer(nn.Module): ...@@ -956,7 +893,6 @@ class ConditionalDetrDecoderLayer(nn.Module):
encoder_attention_mask: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
is_first: Optional[bool] = False, is_first: Optional[bool] = False,
**kwargs,
): ):
""" """
Args: Args:
...@@ -979,22 +915,6 @@ class ConditionalDetrDecoderLayer(nn.Module): ...@@ -979,22 +915,6 @@ class ConditionalDetrDecoderLayer(nn.Module):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail. returned tensors for more detail.
""" """
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
residual = hidden_states residual = hidden_states
# ========== Begin of Self-Attention ============= # ========== Begin of Self-Attention =============
...@@ -1236,7 +1156,6 @@ class ConditionalDetrEncoder(ConditionalDetrPreTrainedModel): ...@@ -1236,7 +1156,6 @@ class ConditionalDetrEncoder(ConditionalDetrPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
r""" r"""
Args: Args:
...@@ -1263,22 +1182,6 @@ class ConditionalDetrEncoder(ConditionalDetrPreTrainedModel): ...@@ -1263,22 +1182,6 @@ class ConditionalDetrEncoder(ConditionalDetrPreTrainedModel):
return_dict (`bool`, *optional*): return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
""" """
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
...@@ -1377,7 +1280,6 @@ class ConditionalDetrDecoder(ConditionalDetrPreTrainedModel): ...@@ -1377,7 +1280,6 @@ class ConditionalDetrDecoder(ConditionalDetrPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
r""" r"""
Args: Args:
...@@ -1414,22 +1316,6 @@ class ConditionalDetrDecoder(ConditionalDetrPreTrainedModel): ...@@ -1414,22 +1316,6 @@ class ConditionalDetrDecoder(ConditionalDetrPreTrainedModel):
return_dict (`bool`, *optional*): return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
""" """
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
......
...@@ -913,31 +913,6 @@ class DeformableDetrImageProcessor(BaseImageProcessor): ...@@ -913,31 +913,6 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
raise ValueError(f"Format {format} is not supported.") raise ValueError(f"Format {format} is not supported.")
return target return target
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare
def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
logger.warning_once(
"The `prepare` method is deprecated and will be removed in a v4.33. "
"Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
"does not return the image anymore.",
)
target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
return image, target
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.convert_coco_poly_to_mask
def convert_coco_poly_to_mask(self, *args, **kwargs):
logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
return convert_coco_poly_to_mask(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_detection
def prepare_coco_detection(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
return prepare_coco_detection_annotation(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_panoptic
def prepare_coco_panoptic(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
return prepare_coco_panoptic_annotation(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize
def resize( def resize(
self, self,
......
...@@ -576,31 +576,6 @@ class DetaImageProcessor(BaseImageProcessor): ...@@ -576,31 +576,6 @@ class DetaImageProcessor(BaseImageProcessor):
raise ValueError(f"Format {format} is not supported.") raise ValueError(f"Format {format} is not supported.")
return target return target
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare
def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
logger.warning_once(
"The `prepare` method is deprecated and will be removed in a v4.33. "
"Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
"does not return the image anymore.",
)
target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
return image, target
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.convert_coco_poly_to_mask
def convert_coco_poly_to_mask(self, *args, **kwargs):
logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
return convert_coco_poly_to_mask(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_detection
def prepare_coco_detection(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
return prepare_coco_detection_annotation(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_panoptic
def prepare_coco_panoptic(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
return prepare_coco_panoptic_annotation(*args, **kwargs)
def resize( def resize(
self, self,
image: np.ndarray, image: np.ndarray,
......
...@@ -896,27 +896,6 @@ class DetrImageProcessor(BaseImageProcessor): ...@@ -896,27 +896,6 @@ class DetrImageProcessor(BaseImageProcessor):
raise ValueError(f"Format {format} is not supported.") raise ValueError(f"Format {format} is not supported.")
return target return target
def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
logger.warning_once(
"The `prepare` method is deprecated and will be removed in a v4.33. "
"Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
"does not return the image anymore.",
)
target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
return image, target
def convert_coco_poly_to_mask(self, *args, **kwargs):
logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
return convert_coco_poly_to_mask(*args, **kwargs)
def prepare_coco_detection(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
return prepare_coco_detection_annotation(*args, **kwargs)
def prepare_coco_panoptic(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
return prepare_coco_panoptic_annotation(*args, **kwargs)
def resize( def resize(
self, self,
image: np.ndarray, image: np.ndarray,
......
...@@ -524,23 +524,7 @@ class DetrAttention(nn.Module): ...@@ -524,23 +524,7 @@ class DetrAttention(nn.Module):
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int): def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs): def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
return tensor if object_queries is None else tensor + object_queries return tensor if object_queries is None else tensor + object_queries
def forward( def forward(
...@@ -551,38 +535,8 @@ class DetrAttention(nn.Module): ...@@ -551,38 +535,8 @@ class DetrAttention(nn.Module):
key_value_states: Optional[torch.Tensor] = None, key_value_states: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None, spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False, output_attentions: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
position_embeddings = kwargs.pop("position_ebmeddings", None)
key_value_position_embeddings = kwargs.pop("key_value_position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if key_value_position_embeddings is not None and spatial_position_embeddings is not None:
raise ValueError(
"Cannot specify both key_value_position_embeddings and spatial_position_embeddings. Please use just spatial_position_embeddings"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
if key_value_position_embeddings is not None:
logger.warning_once(
"key_value_position_embeddings has been deprecated and will be removed in v4.34. Please use spatial_position_embeddings instead"
)
spatial_position_embeddings = key_value_position_embeddings
# if key_value_states are provided this layer is used as a cross-attention layer # if key_value_states are provided this layer is used as a cross-attention layer
# for the decoder # for the decoder
is_cross_attention = key_value_states is not None is_cross_attention = key_value_states is not None
...@@ -688,7 +642,6 @@ class DetrEncoderLayer(nn.Module): ...@@ -688,7 +642,6 @@ class DetrEncoderLayer(nn.Module):
attention_mask: torch.Tensor, attention_mask: torch.Tensor,
object_queries: torch.Tensor = None, object_queries: torch.Tensor = None,
output_attentions: bool = False, output_attentions: bool = False,
**kwargs,
): ):
""" """
Args: Args:
...@@ -702,22 +655,6 @@ class DetrEncoderLayer(nn.Module): ...@@ -702,22 +655,6 @@ class DetrEncoderLayer(nn.Module):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail. returned tensors for more detail.
""" """
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
residual = hidden_states residual = hidden_states
hidden_states, attn_weights = self.self_attn( hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states, hidden_states=hidden_states,
...@@ -787,7 +724,6 @@ class DetrDecoderLayer(nn.Module): ...@@ -787,7 +724,6 @@ class DetrDecoderLayer(nn.Module):
encoder_hidden_states: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
**kwargs,
): ):
""" """
Args: Args:
...@@ -810,22 +746,6 @@ class DetrDecoderLayer(nn.Module): ...@@ -810,22 +746,6 @@ class DetrDecoderLayer(nn.Module):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail. returned tensors for more detail.
""" """
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
residual = hidden_states residual = hidden_states
# Self Attention # Self Attention
...@@ -995,7 +915,6 @@ class DetrEncoder(DetrPreTrainedModel): ...@@ -995,7 +915,6 @@ class DetrEncoder(DetrPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
r""" r"""
Args: Args:
...@@ -1022,22 +941,6 @@ class DetrEncoder(DetrPreTrainedModel): ...@@ -1022,22 +941,6 @@ class DetrEncoder(DetrPreTrainedModel):
return_dict (`bool`, *optional*): return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
""" """
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
...@@ -1129,7 +1032,6 @@ class DetrDecoder(DetrPreTrainedModel): ...@@ -1129,7 +1032,6 @@ class DetrDecoder(DetrPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
r""" r"""
Args: Args:
...@@ -1167,22 +1069,6 @@ class DetrDecoder(DetrPreTrainedModel): ...@@ -1167,22 +1069,6 @@ class DetrDecoder(DetrPreTrainedModel):
return_dict (`bool`, *optional*): return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
""" """
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
"""PyTorch Falcon model.""" """PyTorch Falcon model."""
import math import math
import warnings
from typing import TYPE_CHECKING, Optional, Tuple, Union from typing import TYPE_CHECKING, Optional, Tuple, Union
import torch import torch
...@@ -393,13 +392,7 @@ class FalconAttention(nn.Module): ...@@ -393,13 +392,7 @@ class FalconAttention(nn.Module):
head_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None,
use_cache: bool = False, use_cache: bool = False,
output_attentions: bool = False, output_attentions: bool = False,
**kwargs,
): ):
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
fused_qkv = self.query_key_value(hidden_states) # [batch_size, seq_length, 3 x hidden_size] fused_qkv = self.query_key_value(hidden_states) # [batch_size, seq_length, 3 x hidden_size]
num_kv_heads = self.num_heads if self.new_decoder_architecture else self.num_kv_heads num_kv_heads = self.num_heads if self.new_decoder_architecture else self.num_kv_heads
# 3 x [batch_size, seq_length, num_heads, head_dim] # 3 x [batch_size, seq_length, num_heads, head_dim]
...@@ -549,16 +542,7 @@ class FalconFlashAttention2(FalconAttention): ...@@ -549,16 +542,7 @@ class FalconFlashAttention2(FalconAttention):
head_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None,
use_cache: bool = False, use_cache: bool = False,
output_attentions: bool = False, output_attentions: bool = False,
**kwargs,
): ):
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
# overwrite attention_mask with padding_mask
attention_mask = kwargs.pop("padding_mask")
fused_qkv = self.query_key_value(hidden_states) # [batch_size, seq_length, 3 x hidden_size] fused_qkv = self.query_key_value(hidden_states) # [batch_size, seq_length, 3 x hidden_size]
num_kv_heads = self.num_heads if self.new_decoder_architecture else self.num_kv_heads num_kv_heads = self.num_heads if self.new_decoder_architecture else self.num_kv_heads
# 3 x [batch_size, seq_length, num_heads, head_dim] # 3 x [batch_size, seq_length, num_heads, head_dim]
...@@ -792,13 +776,7 @@ class FalconDecoderLayer(nn.Module): ...@@ -792,13 +776,7 @@ class FalconDecoderLayer(nn.Module):
head_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None,
use_cache: bool = False, use_cache: bool = False,
output_attentions: bool = False, output_attentions: bool = False,
**kwargs,
): ):
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
residual = hidden_states residual = hidden_states
if self.config.new_decoder_architecture and self.config.num_ln_in_parallel_attn == 2: if self.config.new_decoder_architecture and self.config.num_ln_in_parallel_attn == 2:
...@@ -817,7 +795,6 @@ class FalconDecoderLayer(nn.Module): ...@@ -817,7 +795,6 @@ class FalconDecoderLayer(nn.Module):
head_mask=head_mask, head_mask=head_mask,
use_cache=use_cache, use_cache=use_cache,
output_attentions=output_attentions, output_attentions=output_attentions,
**kwargs,
) )
attention_output = attn_outputs[0] attention_output = attn_outputs[0]
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
""" PyTorch Gemma model.""" """ PyTorch Gemma model."""
import math import math
import warnings
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import torch import torch
...@@ -616,7 +615,6 @@ class GemmaDecoderLayer(nn.Module): ...@@ -616,7 +615,6 @@ class GemmaDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False, use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
""" """
Args: Args:
...@@ -632,11 +630,6 @@ class GemmaDecoderLayer(nn.Module): ...@@ -632,11 +630,6 @@ class GemmaDecoderLayer(nn.Module):
(see `past_key_values`). (see `past_key_values`).
past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
""" """
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
residual = hidden_states residual = hidden_states
hidden_states = self.input_layernorm(hidden_states) hidden_states = self.input_layernorm(hidden_states)
...@@ -650,7 +643,6 @@ class GemmaDecoderLayer(nn.Module): ...@@ -650,7 +643,6 @@ class GemmaDecoderLayer(nn.Module):
output_attentions=output_attentions, output_attentions=output_attentions,
use_cache=use_cache, use_cache=use_cache,
cache_position=cache_position, cache_position=cache_position,
**kwargs,
) )
hidden_states = residual + hidden_states hidden_states = residual + hidden_states
......
...@@ -920,31 +920,6 @@ class GroundingDinoImageProcessor(BaseImageProcessor): ...@@ -920,31 +920,6 @@ class GroundingDinoImageProcessor(BaseImageProcessor):
raise ValueError(f"Format {format} is not supported.") raise ValueError(f"Format {format} is not supported.")
return target return target
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare
def prepare(self, image, target, return_segmentation_masks=None, masks_path=None):
logger.warning_once(
"The `prepare` method is deprecated and will be removed in a v4.33. "
"Please use `prepare_annotation` instead. Note: the `prepare_annotation` method "
"does not return the image anymore.",
)
target = self.prepare_annotation(image, target, return_segmentation_masks, masks_path, self.format)
return image, target
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.convert_coco_poly_to_mask
def convert_coco_poly_to_mask(self, *args, **kwargs):
logger.warning_once("The `convert_coco_poly_to_mask` method is deprecated and will be removed in v4.33. ")
return convert_coco_poly_to_mask(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_detection
def prepare_coco_detection(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_detection` method is deprecated and will be removed in v4.33. ")
return prepare_coco_detection_annotation(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_coco_panoptic
def prepare_coco_panoptic(self, *args, **kwargs):
logger.warning_once("The `prepare_coco_panoptic` method is deprecated and will be removed in v4.33. ")
return prepare_coco_panoptic_annotation(*args, **kwargs)
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.resize
def resize( def resize(
self, self,
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
"""PyTorch LLaMA model.""" """PyTorch LLaMA model."""
import math import math
import warnings
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import torch import torch
...@@ -104,29 +103,6 @@ class LlamaRotaryEmbedding(nn.Module): ...@@ -104,29 +103,6 @@ class LlamaRotaryEmbedding(nn.Module):
self.register_buffer("inv_freq", inv_freq, persistent=False) self.register_buffer("inv_freq", inv_freq, persistent=False)
# For BC we register cos and sin cached # For BC we register cos and sin cached
self.max_seq_len_cached = max_position_embeddings self.max_seq_len_cached = max_position_embeddings
t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq)
t = t / self.scaling_factor
freqs = torch.outer(t, self.inv_freq)
# Different from paper, but it uses a different permutation in order to obtain the same calculation
emb = torch.cat((freqs, freqs), dim=-1)
self.register_buffer("_cos_cached", emb.cos().to(torch.get_default_dtype()), persistent=False)
self.register_buffer("_sin_cached", emb.sin().to(torch.get_default_dtype()), persistent=False)
@property
def sin_cached(self):
logger.warning_once(
"The sin_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use "
"the forward method of RoPE from now on instead. It is not used in the `LlamaAttention` class"
)
return self._sin_cached
@property
def cos_cached(self):
logger.warning_once(
"The cos_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use "
"the forward method of RoPE from now on instead. It is not used in the `LlamaAttention` class"
)
return self._cos_cached
@torch.no_grad() @torch.no_grad()
def forward(self, x, position_ids): def forward(self, x, position_ids):
...@@ -714,7 +690,6 @@ class LlamaDecoderLayer(nn.Module): ...@@ -714,7 +690,6 @@ class LlamaDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False, use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
""" """
Args: Args:
...@@ -730,11 +705,6 @@ class LlamaDecoderLayer(nn.Module): ...@@ -730,11 +705,6 @@ class LlamaDecoderLayer(nn.Module):
(see `past_key_values`). (see `past_key_values`).
past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
""" """
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
residual = hidden_states residual = hidden_states
hidden_states = self.input_layernorm(hidden_states) hidden_states = self.input_layernorm(hidden_states)
...@@ -748,7 +718,6 @@ class LlamaDecoderLayer(nn.Module): ...@@ -748,7 +718,6 @@ class LlamaDecoderLayer(nn.Module):
output_attentions=output_attentions, output_attentions=output_attentions,
use_cache=use_cache, use_cache=use_cache,
cache_position=cache_position, cache_position=cache_position,
**kwargs,
) )
hidden_states = residual + hidden_states hidden_states = residual + hidden_states
......
...@@ -440,23 +440,7 @@ class DetrAttention(nn.Module): ...@@ -440,23 +440,7 @@ class DetrAttention(nn.Module):
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int): def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor], **kwargs): def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
return tensor if object_queries is None else tensor + object_queries return tensor if object_queries is None else tensor + object_queries
def forward( def forward(
...@@ -467,38 +451,8 @@ class DetrAttention(nn.Module): ...@@ -467,38 +451,8 @@ class DetrAttention(nn.Module):
key_value_states: Optional[torch.Tensor] = None, key_value_states: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None, spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False, output_attentions: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
position_embeddings = kwargs.pop("position_ebmeddings", None)
key_value_position_embeddings = kwargs.pop("key_value_position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if key_value_position_embeddings is not None and spatial_position_embeddings is not None:
raise ValueError(
"Cannot specify both key_value_position_embeddings and spatial_position_embeddings. Please use just spatial_position_embeddings"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
if key_value_position_embeddings is not None:
logger.warning_once(
"key_value_position_embeddings has been deprecated and will be removed in v4.34. Please use spatial_position_embeddings instead"
)
spatial_position_embeddings = key_value_position_embeddings
# if key_value_states are provided this layer is used as a cross-attention layer # if key_value_states are provided this layer is used as a cross-attention layer
# for the decoder # for the decoder
is_cross_attention = key_value_states is not None is_cross_attention = key_value_states is not None
...@@ -616,7 +570,6 @@ class DetrDecoderLayer(nn.Module): ...@@ -616,7 +570,6 @@ class DetrDecoderLayer(nn.Module):
encoder_hidden_states: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
**kwargs,
): ):
""" """
Args: Args:
...@@ -639,22 +592,6 @@ class DetrDecoderLayer(nn.Module): ...@@ -639,22 +592,6 @@ class DetrDecoderLayer(nn.Module):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail. returned tensors for more detail.
""" """
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
residual = hidden_states residual = hidden_states
# Self Attention # Self Attention
...@@ -742,7 +679,6 @@ class DetrDecoder(nn.Module): ...@@ -742,7 +679,6 @@ class DetrDecoder(nn.Module):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
r""" r"""
Args: Args:
...@@ -779,21 +715,6 @@ class DetrDecoder(nn.Module): ...@@ -779,21 +715,6 @@ class DetrDecoder(nn.Module):
return_dict (`bool`, *optional*): return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
""" """
position_embeddings = kwargs.pop("position_embeddings", None)
if kwargs:
raise ValueError(f"Unexpected arguments {kwargs.keys()}")
if position_embeddings is not None and object_queries is not None:
raise ValueError(
"Cannot specify both position_embeddings and object_queries. Please use just object_queries"
)
if position_embeddings is not None:
logger.warning_once(
"position_embeddings has been deprecated and will be removed in v4.34. Please use object_queries instead"
)
object_queries = position_embeddings
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
""" PyTorch Mistral model.""" """ PyTorch Mistral model."""
import inspect import inspect
import math import math
import warnings
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import torch import torch
...@@ -246,12 +245,7 @@ class MistralAttention(nn.Module): ...@@ -246,12 +245,7 @@ class MistralAttention(nn.Module):
past_key_value: Optional[Cache] = None, past_key_value: Optional[Cache] = None,
output_attentions: bool = False, output_attentions: bool = False,
use_cache: bool = False, use_cache: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
bsz, q_len, _ = hidden_states.size() bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states) query_states = self.q_proj(hidden_states)
...@@ -344,15 +338,7 @@ class MistralFlashAttention2(MistralAttention): ...@@ -344,15 +338,7 @@ class MistralFlashAttention2(MistralAttention):
past_key_value: Optional[Cache] = None, past_key_value: Optional[Cache] = None,
output_attentions: bool = False, output_attentions: bool = False,
use_cache: bool = False, use_cache: bool = False,
**kwargs,
): ):
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
# overwrite attention_mask with padding_mask
attention_mask = kwargs.pop("padding_mask")
bsz, q_len, _ = hidden_states.size() bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states) query_states = self.q_proj(hidden_states)
...@@ -729,12 +715,7 @@ class MistralDecoderLayer(nn.Module): ...@@ -729,12 +715,7 @@ class MistralDecoderLayer(nn.Module):
past_key_value: Optional[Tuple[torch.Tensor]] = None, past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False, use_cache: Optional[bool] = False,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
""" PyTorch Mixtral model.""" """ PyTorch Mixtral model."""
import inspect import inspect
import math import math
import warnings
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import torch import torch
...@@ -323,12 +322,7 @@ class MixtralAttention(nn.Module): ...@@ -323,12 +322,7 @@ class MixtralAttention(nn.Module):
past_key_value: Optional[Cache] = None, past_key_value: Optional[Cache] = None,
output_attentions: bool = False, output_attentions: bool = False,
use_cache: bool = False, use_cache: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
bsz, q_len, _ = hidden_states.size() bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states) query_states = self.q_proj(hidden_states)
...@@ -422,15 +416,7 @@ class MixtralFlashAttention2(MixtralAttention): ...@@ -422,15 +416,7 @@ class MixtralFlashAttention2(MixtralAttention):
past_key_value: Optional[Cache] = None, past_key_value: Optional[Cache] = None,
output_attentions: bool = False, output_attentions: bool = False,
use_cache: bool = False, use_cache: bool = False,
**kwargs,
): ):
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
# overwrite attention_mask with padding_mask
attention_mask = kwargs.pop("padding_mask")
bsz, q_len, _ = hidden_states.size() bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states) query_states = self.q_proj(hidden_states)
...@@ -805,14 +791,6 @@ class MixtralBlockSparseTop2MLP(nn.Module): ...@@ -805,14 +791,6 @@ class MixtralBlockSparseTop2MLP(nn.Module):
return current_hidden_states return current_hidden_states
class MixtralBLockSparseTop2MLP(MixtralBlockSparseTop2MLP):
def __init__(self, *args, **kwargs):
logger.warning_once(
"MixtralBLockSparseTop2MLP is deprecated by MixtralBlockSparseTop2MLP and will be removed in v4.40."
)
super().__init__(*args, **kwargs)
class MixtralSparseMoeBlock(nn.Module): class MixtralSparseMoeBlock(nn.Module):
""" """
This implementation is This implementation is
...@@ -901,12 +879,7 @@ class MixtralDecoderLayer(nn.Module): ...@@ -901,12 +879,7 @@ class MixtralDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
output_router_logits: Optional[bool] = False, output_router_logits: Optional[bool] = False,
use_cache: Optional[bool] = False, use_cache: Optional[bool] = False,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
"""PyTorch OLMo model.""" """PyTorch OLMo model."""
import math import math
import warnings
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import torch import torch
...@@ -101,29 +100,6 @@ class OlmoRotaryEmbedding(nn.Module): ...@@ -101,29 +100,6 @@ class OlmoRotaryEmbedding(nn.Module):
self.register_buffer("inv_freq", inv_freq, persistent=False) self.register_buffer("inv_freq", inv_freq, persistent=False)
# For BC we register cos and sin cached # For BC we register cos and sin cached
self.max_seq_len_cached = max_position_embeddings self.max_seq_len_cached = max_position_embeddings
t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq)
t = t / self.scaling_factor
freqs = torch.outer(t, self.inv_freq)
# Different from paper, but it uses a different permutation in order to obtain the same calculation
emb = torch.cat((freqs, freqs), dim=-1)
self.register_buffer("_cos_cached", emb.cos().to(torch.get_default_dtype()), persistent=False)
self.register_buffer("_sin_cached", emb.sin().to(torch.get_default_dtype()), persistent=False)
@property
def sin_cached(self):
logger.warning_once(
"The sin_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use "
"the forward method of RoPE from now on instead. It is not used in the `OlmoAttention` class"
)
return self._sin_cached
@property
def cos_cached(self):
logger.warning_once(
"The cos_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use "
"the forward method of RoPE from now on instead. It is not used in the `OlmoAttention` class"
)
return self._cos_cached
@torch.no_grad() @torch.no_grad()
def forward(self, x, position_ids): def forward(self, x, position_ids):
...@@ -690,7 +666,6 @@ class OlmoDecoderLayer(nn.Module): ...@@ -690,7 +666,6 @@ class OlmoDecoderLayer(nn.Module):
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False, use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
""" """
Args: Args:
...@@ -706,11 +681,6 @@ class OlmoDecoderLayer(nn.Module): ...@@ -706,11 +681,6 @@ class OlmoDecoderLayer(nn.Module):
(see `past_key_values`). (see `past_key_values`).
past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
""" """
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
residual = hidden_states residual = hidden_states
hidden_states = self.input_layernorm(hidden_states) hidden_states = self.input_layernorm(hidden_states)
...@@ -724,7 +694,6 @@ class OlmoDecoderLayer(nn.Module): ...@@ -724,7 +694,6 @@ class OlmoDecoderLayer(nn.Module):
output_attentions=output_attentions, output_attentions=output_attentions,
use_cache=use_cache, use_cache=use_cache,
cache_position=cache_position, cache_position=cache_position,
**kwargs,
) )
hidden_states = residual + hidden_states hidden_states = residual + hidden_states
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
# limitations under the License. # limitations under the License.
""" PyTorch OWLv2 model.""" """ PyTorch OWLv2 model."""
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from functools import lru_cache from functools import lru_cache
from typing import Any, Dict, Optional, Tuple, Union from typing import Any, Dict, Optional, Tuple, Union
...@@ -1197,16 +1196,7 @@ class Owlv2Model(Owlv2PreTrainedModel): ...@@ -1197,16 +1196,7 @@ class Owlv2Model(Owlv2PreTrainedModel):
if return_loss: if return_loss:
loss = owlv2_loss(logits_per_text) loss = owlv2_loss(logits_per_text)
if return_base_image_embeds: text_embeds = text_embeds_norm
warnings.warn(
"`return_base_image_embeds` is deprecated and will be removed in v4.27 of Transformers, one can"
" obtain the base (unprojected) image embeddings from outputs.vision_model_output.",
FutureWarning,
)
last_hidden_state = vision_outputs[0]
image_embeds = self.vision_model.post_layernorm(last_hidden_state)
else:
text_embeds = text_embeds_norm
if not return_dict: if not return_dict:
output = (logits_per_image, logits_per_text, text_embeds, image_embeds, text_outputs, vision_outputs) output = (logits_per_image, logits_per_text, text_embeds, image_embeds, text_outputs, vision_outputs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment