Unverified Commit dad5ca83 authored by Joao Gante's avatar Joao Gante Committed by GitHub
Browse files

TF: Finalize `unpack_inputs`-related changes (#16499)

* Add unpack_inputs to remaining models

* removed kwargs to `call()` in TF models

* fix TF T5 tests
parent be9474bd
...@@ -312,10 +312,12 @@ def booleans_processing(config, **kwargs): ...@@ -312,10 +312,12 @@ def booleans_processing(config, **kwargs):
final_booleans = {} final_booleans = {}
if tf.executing_eagerly(): if tf.executing_eagerly():
# Pure conv models (such as ConvNext) do not have `output_attentions` # Pure conv models (such as ConvNext) do not have `output_attentions`. If the signature has
final_booleans["output_attentions"] = kwargs.get("output_attentions", None) # `output_attentions`, it will be present here in `kwargs`, even if unset (in that case, as `None`)
if final_booleans["output_attentions"] is None: if "output_attentions" in kwargs:
final_booleans["output_attentions"] = config.output_attentions final_booleans["output_attentions"] = (
kwargs["output_attentions"] if kwargs["output_attentions"] is not None else config.output_attentions
)
final_booleans["output_hidden_states"] = ( final_booleans["output_hidden_states"] = (
kwargs["output_hidden_states"] kwargs["output_hidden_states"]
if kwargs["output_hidden_states"] is not None if kwargs["output_hidden_states"] is not None
...@@ -330,7 +332,10 @@ def booleans_processing(config, **kwargs): ...@@ -330,7 +332,10 @@ def booleans_processing(config, **kwargs):
kwargs["use_cache"] if kwargs["use_cache"] is not None else getattr(config, "use_cache", None) kwargs["use_cache"] if kwargs["use_cache"] is not None else getattr(config, "use_cache", None)
) )
else: else:
final_booleans["output_attentions"] = config.output_attentions # Pure conv models (such as ConvNext) do not have `output_attentions`. If the signature has
# `output_attentions`, it will be present here in `kwargs`, even if unset (in that case, as `None`)
if "output_attentions" in kwargs:
final_booleans["output_attentions"] = config.output_attentions
final_booleans["output_hidden_states"] = config.output_hidden_states final_booleans["output_hidden_states"] = config.output_hidden_states
if kwargs.get("return_dict", None) not in (None, True): if kwargs.get("return_dict", None) not in (None, True):
...@@ -403,7 +408,7 @@ def input_processing(func, config, input_ids, **kwargs): ...@@ -403,7 +408,7 @@ def input_processing(func, config, input_ids, **kwargs):
Two lists, one for the missing layers, and another one for the unexpected layers. Two lists, one for the missing layers, and another one for the unexpected layers.
""" """
signature = dict(inspect.signature(func).parameters) signature = dict(inspect.signature(func).parameters)
signature.pop("kwargs", None) has_kwargs = bool(signature.pop("kwargs", None))
signature.pop("self", None) signature.pop("self", None)
parameter_names = list(signature.keys()) parameter_names = list(signature.keys())
output = {} output = {}
...@@ -433,12 +438,14 @@ def input_processing(func, config, input_ids, **kwargs): ...@@ -433,12 +438,14 @@ def input_processing(func, config, input_ids, **kwargs):
elif "past_key_values" in kwargs["kwargs_call"] and "past" in parameter_names: elif "past_key_values" in kwargs["kwargs_call"] and "past" in parameter_names:
kwargs["past"] = kwargs["kwargs_call"].pop("past_key_values") kwargs["past"] = kwargs["kwargs_call"].pop("past_key_values")
if len(kwargs["kwargs_call"]) > 0: if has_kwargs:
raise ValueError( output["kwargs"] = kwargs.pop("kwargs_call", {})
f"The following keyword arguments are not supported by this model: {list(kwargs['kwargs_call'].keys())}." else:
) if len(kwargs["kwargs_call"]) > 0:
raise ValueError(
kwargs.pop("kwargs_call") f"The following keyword arguments are not supported by this model: {list(kwargs['kwargs_call'].keys())}."
)
kwargs.pop("kwargs_call")
for k, v in kwargs.items(): for k, v in kwargs.items():
if isinstance(v, allowed_types) or v is None: if isinstance(v, allowed_types) or v is None:
......
...@@ -551,7 +551,6 @@ class TFAlbertMainLayer(tf.keras.layers.Layer): ...@@ -551,7 +551,6 @@ class TFAlbertMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
...@@ -785,7 +784,6 @@ class TFAlbertModel(TFAlbertPreTrainedModel): ...@@ -785,7 +784,6 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]:
outputs = self.albert( outputs = self.albert(
input_ids=input_ids, input_ids=input_ids,
...@@ -854,7 +852,6 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel, TFAlbertPreTrainingLoss): ...@@ -854,7 +852,6 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel, TFAlbertPreTrainingLoss):
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
sentence_order_label: Optional[Union[np.ndarray, tf.Tensor]] = None, sentence_order_label: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFAlbertForPreTrainingOutput, Tuple[tf.Tensor]]: ) -> Union[TFAlbertForPreTrainingOutput, Tuple[tf.Tensor]]:
r""" r"""
Return: Return:
...@@ -976,7 +973,6 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss) ...@@ -976,7 +973,6 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss)
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1064,7 +1060,6 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass ...@@ -1064,7 +1060,6 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -1158,7 +1153,6 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat ...@@ -1158,7 +1153,6 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1244,7 +1238,6 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL ...@@ -1244,7 +1238,6 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -1355,7 +1348,6 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1355,7 +1348,6 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
......
...@@ -679,7 +679,6 @@ class TFBartEncoder(tf.keras.layers.Layer): ...@@ -679,7 +679,6 @@ class TFBartEncoder(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]:
""" """
Args: Args:
...@@ -834,7 +833,6 @@ class TFBartDecoder(tf.keras.layers.Layer): ...@@ -834,7 +833,6 @@ class TFBartDecoder(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]:
r""" r"""
Args: Args:
...@@ -1273,7 +1271,6 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel, TFCausalLanguageMode ...@@ -1273,7 +1271,6 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel, TFCausalLanguageMode
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[tf.Tensor] = None, labels: Optional[tf.Tensor] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFSeq2SeqLMOutput, Tuple[tf.Tensor]]: ) -> Union[TFSeq2SeqLMOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
......
...@@ -737,7 +737,6 @@ class TFBertMainLayer(tf.keras.layers.Layer): ...@@ -737,7 +737,6 @@ class TFBertMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]:
if not self.config.is_decoder: if not self.config.is_decoder:
...@@ -1067,7 +1066,6 @@ class TFBertModel(TFBertPreTrainedModel): ...@@ -1067,7 +1066,6 @@ class TFBertModel(TFBertPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
...@@ -1174,7 +1172,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss): ...@@ -1174,7 +1172,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBertForPreTrainingOutput, Tuple[tf.Tensor]]: ) -> Union[TFBertForPreTrainingOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1302,7 +1299,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): ...@@ -1302,7 +1299,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1520,7 +1516,6 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi ...@@ -1520,7 +1516,6 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFNextSentencePredictorOutput, Tuple[tf.Tensor]]: ) -> Union[TFNextSentencePredictorOutput, Tuple[tf.Tensor]]:
r""" r"""
Return: Return:
...@@ -1628,7 +1623,6 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific ...@@ -1628,7 +1623,6 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
...@@ -1723,7 +1717,6 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1723,7 +1717,6 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
...@@ -1857,7 +1850,6 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL ...@@ -1857,7 +1850,6 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1949,7 +1941,6 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss) ...@@ -1949,7 +1941,6 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]:
r""" r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
......
...@@ -662,7 +662,6 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer): ...@@ -662,7 +662,6 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
""" """
Args: Args:
...@@ -823,7 +822,6 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer): ...@@ -823,7 +822,6 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
r""" r"""
Args: Args:
...@@ -1276,7 +1274,6 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal ...@@ -1276,7 +1274,6 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[tf.Tensor] = None, labels: Optional[tf.Tensor] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[Tuple[tf.Tensor], TFSeq2SeqLMOutput]: ) -> Union[Tuple[tf.Tensor], TFSeq2SeqLMOutput]:
r""" r"""
labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*):
......
...@@ -667,7 +667,6 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer): ...@@ -667,7 +667,6 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
""" """
Args: Args:
...@@ -827,7 +826,6 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer): ...@@ -827,7 +826,6 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
r""" r"""
Args: Args:
...@@ -1253,7 +1251,6 @@ class TFBlenderbotSmallForConditionalGeneration(TFBlenderbotSmallPreTrainedModel ...@@ -1253,7 +1251,6 @@ class TFBlenderbotSmallForConditionalGeneration(TFBlenderbotSmallPreTrainedModel
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[tf.Tensor] = None, labels: Optional[tf.Tensor] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[Tuple[tf.Tensor], TFSeq2SeqLMOutput]: ) -> Union[Tuple[tf.Tensor], TFSeq2SeqLMOutput]:
r""" r"""
labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*):
......
...@@ -504,7 +504,6 @@ class TFCLIPTextTransformer(tf.keras.layers.Layer): ...@@ -504,7 +504,6 @@ class TFCLIPTextTransformer(tf.keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]:
input_shape = shape_list(input_ids) input_shape = shape_list(input_ids)
...@@ -593,7 +592,6 @@ class TFCLIPTextMainLayer(tf.keras.layers.Layer): ...@@ -593,7 +592,6 @@ class TFCLIPTextMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]:
if input_ids is None: if input_ids is None:
raise ValueError("You have to specify input_ids") raise ValueError("You have to specify input_ids")
...@@ -632,7 +630,6 @@ class TFCLIPVisionTransformer(tf.keras.layers.Layer): ...@@ -632,7 +630,6 @@ class TFCLIPVisionTransformer(tf.keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]:
embedding_output = self.embeddings(pixel_values=pixel_values) embedding_output = self.embeddings(pixel_values=pixel_values)
...@@ -683,7 +680,6 @@ class TFCLIPVisionMainLayer(tf.keras.layers.Layer): ...@@ -683,7 +680,6 @@ class TFCLIPVisionMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]:
if pixel_values is None: if pixel_values is None:
...@@ -762,7 +758,6 @@ class TFCLIPMainLayer(tf.keras.layers.Layer): ...@@ -762,7 +758,6 @@ class TFCLIPMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> tf.Tensor: ) -> tf.Tensor:
if input_ids is None: if input_ids is None:
...@@ -796,7 +791,6 @@ class TFCLIPMainLayer(tf.keras.layers.Layer): ...@@ -796,7 +791,6 @@ class TFCLIPMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> tf.Tensor: ) -> tf.Tensor:
if pixel_values is None: if pixel_values is None:
raise ValueError("You have to specify pixel_values") raise ValueError("You have to specify pixel_values")
...@@ -826,7 +820,6 @@ class TFCLIPMainLayer(tf.keras.layers.Layer): ...@@ -826,7 +820,6 @@ class TFCLIPMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFCLIPOutput, Tuple[tf.Tensor]]: ) -> Union[TFCLIPOutput, Tuple[tf.Tensor]]:
if input_ids is None: if input_ids is None:
...@@ -1058,7 +1051,6 @@ class TFCLIPTextModel(TFCLIPPreTrainedModel): ...@@ -1058,7 +1051,6 @@ class TFCLIPTextModel(TFCLIPPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]:
r""" r"""
Returns: Returns:
...@@ -1153,7 +1145,6 @@ class TFCLIPVisionModel(TFCLIPPreTrainedModel): ...@@ -1153,7 +1145,6 @@ class TFCLIPVisionModel(TFCLIPPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]:
r""" r"""
Returns: Returns:
...@@ -1258,7 +1249,6 @@ class TFCLIPModel(TFCLIPPreTrainedModel): ...@@ -1258,7 +1249,6 @@ class TFCLIPModel(TFCLIPPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> tf.Tensor: ) -> tf.Tensor:
r""" r"""
Returns: Returns:
...@@ -1297,7 +1287,6 @@ class TFCLIPModel(TFCLIPPreTrainedModel): ...@@ -1297,7 +1287,6 @@ class TFCLIPModel(TFCLIPPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> tf.Tensor: ) -> tf.Tensor:
r""" r"""
Returns: Returns:
...@@ -1345,7 +1334,6 @@ class TFCLIPModel(TFCLIPPreTrainedModel): ...@@ -1345,7 +1334,6 @@ class TFCLIPModel(TFCLIPPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFCLIPOutput, Tuple[tf.Tensor]]: ) -> Union[TFCLIPOutput, Tuple[tf.Tensor]]:
r""" r"""
Returns: Returns:
......
...@@ -581,7 +581,6 @@ class TFConvBertMainLayer(tf.keras.layers.Layer): ...@@ -581,7 +581,6 @@ class TFConvBertMainLayer(tf.keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
...@@ -751,7 +750,6 @@ class TFConvBertModel(TFConvBertPreTrainedModel): ...@@ -751,7 +750,6 @@ class TFConvBertModel(TFConvBertPreTrainedModel):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
outputs = self.convbert( outputs = self.convbert(
input_ids=input_ids, input_ids=input_ids,
...@@ -870,7 +868,6 @@ class TFConvBertForMaskedLM(TFConvBertPreTrainedModel, TFMaskedLanguageModelingL ...@@ -870,7 +868,6 @@ class TFConvBertForMaskedLM(TFConvBertPreTrainedModel, TFMaskedLanguageModelingL
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[tf.Tensor] = None, labels: Optional[tf.Tensor] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[Tuple, TFMaskedLMOutput]: ) -> Union[Tuple, TFMaskedLMOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -979,7 +976,6 @@ class TFConvBertForSequenceClassification(TFConvBertPreTrainedModel, TFSequenceC ...@@ -979,7 +976,6 @@ class TFConvBertForSequenceClassification(TFConvBertPreTrainedModel, TFSequenceC
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[tf.Tensor] = None, labels: Optional[tf.Tensor] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[Tuple, TFSequenceClassifierOutput]: ) -> Union[Tuple, TFSequenceClassifierOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -1073,7 +1069,6 @@ class TFConvBertForMultipleChoice(TFConvBertPreTrainedModel, TFMultipleChoiceLos ...@@ -1073,7 +1069,6 @@ class TFConvBertForMultipleChoice(TFConvBertPreTrainedModel, TFMultipleChoiceLos
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[tf.Tensor] = None, labels: Optional[tf.Tensor] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[Tuple, TFMultipleChoiceModelOutput]: ) -> Union[Tuple, TFMultipleChoiceModelOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -1188,7 +1183,6 @@ class TFConvBertForTokenClassification(TFConvBertPreTrainedModel, TFTokenClassif ...@@ -1188,7 +1183,6 @@ class TFConvBertForTokenClassification(TFConvBertPreTrainedModel, TFTokenClassif
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[tf.Tensor] = None, labels: Optional[tf.Tensor] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[Tuple, TFTokenClassifierOutput]: ) -> Union[Tuple, TFTokenClassifierOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1268,7 +1262,6 @@ class TFConvBertForQuestionAnswering(TFConvBertPreTrainedModel, TFQuestionAnswer ...@@ -1268,7 +1262,6 @@ class TFConvBertForQuestionAnswering(TFConvBertPreTrainedModel, TFQuestionAnswer
start_positions: Optional[tf.Tensor] = None, start_positions: Optional[tf.Tensor] = None,
end_positions: Optional[tf.Tensor] = None, end_positions: Optional[tf.Tensor] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[Tuple, TFQuestionAnsweringModelOutput]: ) -> Union[Tuple, TFQuestionAnsweringModelOutput]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
......
...@@ -293,7 +293,6 @@ class TFConvNextMainLayer(tf.keras.layers.Layer): ...@@ -293,7 +293,6 @@ class TFConvNextMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]:
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
...@@ -439,7 +438,6 @@ class TFConvNextModel(TFConvNextPreTrainedModel): ...@@ -439,7 +438,6 @@ class TFConvNextModel(TFConvNextPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor]]:
r""" r"""
Returns: Returns:
...@@ -518,7 +516,6 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas ...@@ -518,7 +516,6 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
......
...@@ -268,7 +268,6 @@ class TFCTRLMainLayer(tf.keras.layers.Layer): ...@@ -268,7 +268,6 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
# If using past key value states, only the last tokens # If using past key value states, only the last tokens
...@@ -541,7 +540,6 @@ class TFCTRLModel(TFCTRLPreTrainedModel): ...@@ -541,7 +540,6 @@ class TFCTRLModel(TFCTRLPreTrainedModel):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
outputs = self.transformer( outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
...@@ -653,7 +651,6 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -653,7 +651,6 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss):
return_dict=None, return_dict=None,
labels=None, labels=None,
training=False, training=False,
**kwargs,
): ):
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -765,7 +762,6 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific ...@@ -765,7 +762,6 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific
return_dict=None, return_dict=None,
labels=None, labels=None,
training=False, training=False,
**kwargs,
): ):
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
......
...@@ -928,7 +928,6 @@ class TFDebertaMainLayer(tf.keras.layers.Layer): ...@@ -928,7 +928,6 @@ class TFDebertaMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
...@@ -1096,7 +1095,6 @@ class TFDebertaModel(TFDebertaPreTrainedModel): ...@@ -1096,7 +1095,6 @@ class TFDebertaModel(TFDebertaPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]:
outputs = self.deberta( outputs = self.deberta(
input_ids=input_ids, input_ids=input_ids,
...@@ -1156,7 +1154,6 @@ class TFDebertaForMaskedLM(TFDebertaPreTrainedModel, TFMaskedLanguageModelingLos ...@@ -1156,7 +1154,6 @@ class TFDebertaForMaskedLM(TFDebertaPreTrainedModel, TFMaskedLanguageModelingLos
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1242,7 +1239,6 @@ class TFDebertaForSequenceClassification(TFDebertaPreTrainedModel, TFSequenceCla ...@@ -1242,7 +1239,6 @@ class TFDebertaForSequenceClassification(TFDebertaPreTrainedModel, TFSequenceCla
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
...@@ -1325,7 +1321,6 @@ class TFDebertaForTokenClassification(TFDebertaPreTrainedModel, TFTokenClassific ...@@ -1325,7 +1321,6 @@ class TFDebertaForTokenClassification(TFDebertaPreTrainedModel, TFTokenClassific
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1404,7 +1399,6 @@ class TFDebertaForQuestionAnswering(TFDebertaPreTrainedModel, TFQuestionAnswerin ...@@ -1404,7 +1399,6 @@ class TFDebertaForQuestionAnswering(TFDebertaPreTrainedModel, TFQuestionAnswerin
start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]:
r""" r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
......
...@@ -1028,7 +1028,6 @@ class TFDebertaV2MainLayer(tf.keras.layers.Layer): ...@@ -1028,7 +1028,6 @@ class TFDebertaV2MainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
...@@ -1198,7 +1197,6 @@ class TFDebertaV2Model(TFDebertaV2PreTrainedModel): ...@@ -1198,7 +1197,6 @@ class TFDebertaV2Model(TFDebertaV2PreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]:
outputs = self.deberta( outputs = self.deberta(
input_ids=input_ids, input_ids=input_ids,
...@@ -1259,7 +1257,6 @@ class TFDebertaV2ForMaskedLM(TFDebertaV2PreTrainedModel, TFMaskedLanguageModelin ...@@ -1259,7 +1257,6 @@ class TFDebertaV2ForMaskedLM(TFDebertaV2PreTrainedModel, TFMaskedLanguageModelin
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1346,7 +1343,6 @@ class TFDebertaV2ForSequenceClassification(TFDebertaV2PreTrainedModel, TFSequenc ...@@ -1346,7 +1343,6 @@ class TFDebertaV2ForSequenceClassification(TFDebertaV2PreTrainedModel, TFSequenc
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
...@@ -1430,7 +1426,6 @@ class TFDebertaV2ForTokenClassification(TFDebertaV2PreTrainedModel, TFTokenClass ...@@ -1430,7 +1426,6 @@ class TFDebertaV2ForTokenClassification(TFDebertaV2PreTrainedModel, TFTokenClass
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1510,7 +1505,6 @@ class TFDebertaV2ForQuestionAnswering(TFDebertaV2PreTrainedModel, TFQuestionAnsw ...@@ -1510,7 +1505,6 @@ class TFDebertaV2ForQuestionAnswering(TFDebertaV2PreTrainedModel, TFQuestionAnsw
start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]:
r""" r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
......
...@@ -372,7 +372,6 @@ class TFDistilBertMainLayer(tf.keras.layers.Layer): ...@@ -372,7 +372,6 @@ class TFDistilBertMainLayer(tf.keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
...@@ -543,7 +542,6 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel): ...@@ -543,7 +542,6 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]:
outputs = self.distilbert( outputs = self.distilbert(
input_ids=input_ids, input_ids=input_ids,
...@@ -647,7 +645,6 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel ...@@ -647,7 +645,6 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -735,7 +732,6 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque ...@@ -735,7 +732,6 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -817,7 +813,6 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla ...@@ -817,7 +813,6 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -911,7 +906,6 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic ...@@ -911,7 +906,6 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -1021,7 +1015,6 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn ...@@ -1021,7 +1015,6 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn
start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
......
...@@ -174,7 +174,6 @@ class TFDPREncoderLayer(tf.keras.layers.Layer): ...@@ -174,7 +174,6 @@ class TFDPREncoderLayer(tf.keras.layers.Layer):
output_hidden_states: bool = None, output_hidden_states: bool = None,
return_dict: bool = None, return_dict: bool = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor, ...]]: ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor, ...]]:
outputs = self.bert_model( outputs = self.bert_model(
input_ids=input_ids, input_ids=input_ids,
...@@ -235,7 +234,6 @@ class TFDPRSpanPredictorLayer(tf.keras.layers.Layer): ...@@ -235,7 +234,6 @@ class TFDPRSpanPredictorLayer(tf.keras.layers.Layer):
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = False, return_dict: bool = False,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]: ) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]:
# notations: N - number of questions in a batch, M - number of passages per questions, L - sequence length # notations: N - number of questions in a batch, M - number of passages per questions, L - sequence length
n_passages, sequence_length = shape_list(input_ids) if input_ids is not None else shape_list(inputs_embeds)[:2] n_passages, sequence_length = shape_list(input_ids) if input_ids is not None else shape_list(inputs_embeds)[:2]
...@@ -294,7 +292,6 @@ class TFDPRSpanPredictor(TFPreTrainedModel): ...@@ -294,7 +292,6 @@ class TFDPRSpanPredictor(TFPreTrainedModel):
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = False, return_dict: bool = False,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]: ) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]:
outputs = self.encoder( outputs = self.encoder(
input_ids=input_ids, input_ids=input_ids,
...@@ -328,7 +325,6 @@ class TFDPREncoder(TFPreTrainedModel): ...@@ -328,7 +325,6 @@ class TFDPREncoder(TFPreTrainedModel):
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = False, return_dict: bool = False,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]: ) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]:
outputs = self.encoder( outputs = self.encoder(
input_ids=input_ids, input_ids=input_ids,
...@@ -560,7 +556,6 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder): ...@@ -560,7 +556,6 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFDPRContextEncoderOutput, Tuple[tf.Tensor, ...]]: ) -> Union[TFDPRContextEncoderOutput, Tuple[tf.Tensor, ...]]:
r""" r"""
Return: Return:
...@@ -648,7 +643,6 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder): ...@@ -648,7 +643,6 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFDPRQuestionEncoderOutput, Tuple[tf.Tensor, ...]]: ) -> Union[TFDPRQuestionEncoderOutput, Tuple[tf.Tensor, ...]]:
r""" r"""
Return: Return:
...@@ -734,7 +728,6 @@ class TFDPRReader(TFDPRPretrainedReader): ...@@ -734,7 +728,6 @@ class TFDPRReader(TFDPRPretrainedReader):
output_hidden_states: bool = None, output_hidden_states: bool = None,
return_dict=None, return_dict=None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]: ) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]:
r""" r"""
Return: Return:
......
...@@ -719,7 +719,6 @@ class TFElectraMainLayer(tf.keras.layers.Layer): ...@@ -719,7 +719,6 @@ class TFElectraMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]:
if not self.config.is_decoder: if not self.config.is_decoder:
use_cache = False use_cache = False
...@@ -953,7 +952,6 @@ class TFElectraModel(TFElectraPreTrainedModel): ...@@ -953,7 +952,6 @@ class TFElectraModel(TFElectraPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
...@@ -1043,7 +1041,6 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel): ...@@ -1043,7 +1041,6 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFElectraForPreTrainingOutput, Tuple[tf.Tensor]]: ) -> Union[TFElectraForPreTrainingOutput, Tuple[tf.Tensor]]:
r""" r"""
Returns: Returns:
...@@ -1180,7 +1177,6 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos ...@@ -1180,7 +1177,6 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1290,7 +1286,6 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla ...@@ -1290,7 +1286,6 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -1383,7 +1378,6 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss) ...@@ -1383,7 +1378,6 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss)
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -1501,7 +1495,6 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific ...@@ -1501,7 +1495,6 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1583,7 +1576,6 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin ...@@ -1583,7 +1576,6 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin
start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
......
...@@ -23,7 +23,7 @@ import tensorflow as tf ...@@ -23,7 +23,7 @@ import tensorflow as tf
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...modeling_tf_outputs import TFBaseModelOutput, TFSeq2SeqLMOutput from ...modeling_tf_outputs import TFBaseModelOutput, TFSeq2SeqLMOutput
from ...modeling_tf_utils import TFCausalLanguageModelingLoss, TFPreTrainedModel, get_initializer, input_processing from ...modeling_tf_utils import TFCausalLanguageModelingLoss, TFPreTrainedModel, get_initializer, unpack_inputs
from ...tf_utils import shape_list from ...tf_utils import shape_list
from ...utils import ( from ...utils import (
DUMMY_INPUTS, DUMMY_INPUTS,
...@@ -491,6 +491,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -491,6 +491,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config, **kwargs) config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config, **kwargs)
return cls(encoder=encoder, decoder=decoder, config=config) return cls(encoder=encoder, decoder=decoder, config=config)
@unpack_inputs
@add_start_docstrings_to_model_forward(ENCODER_DECODER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ENCODER_DECODER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
def call( def call(
...@@ -559,9 +560,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -559,9 +560,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
if encoder_outputs is None: if encoder_outputs is None:
encoder_processing_inputs = { encoder_inputs = {
"func": self.encoder.call,
"config": self.encoder.config,
"input_ids": input_ids, "input_ids": input_ids,
"attention_mask": attention_mask, "attention_mask": attention_mask,
"inputs_embeds": inputs_embeds, "inputs_embeds": inputs_embeds,
...@@ -569,14 +568,10 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -569,14 +568,10 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
"output_hidden_states": output_hidden_states, "output_hidden_states": output_hidden_states,
"return_dict": return_dict, "return_dict": return_dict,
"training": training, "training": training,
"kwargs_call": {},
} }
# Add arguments to encoder from `kwargs_encoder` # Add arguments to encoder from `kwargs_encoder`
for k, v in kwargs_encoder.items(): encoder_inputs.update(kwargs_encoder)
encoder_processing_inputs[k] = v
encoder_inputs = input_processing(**encoder_processing_inputs)
# Handle the case where the inputs are passed as a single dict which contains `labels`. # Handle the case where the inputs are passed as a single dict which contains `labels`.
# The `labels` shouldn't be passed to `self.encoder` below, because it is a based model without this # The `labels` shouldn't be passed to `self.encoder` below, because it is a based model without this
...@@ -607,9 +602,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -607,9 +602,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
labels, self.config.pad_token_id, self.config.decoder_start_token_id labels, self.config.pad_token_id, self.config.decoder_start_token_id
) )
decoder_processing_inputs = { decoder_inputs = {
"func": self.decoder.call,
"config": self.decoder.config,
"input_ids": decoder_input_ids, "input_ids": decoder_input_ids,
"attention_mask": decoder_attention_mask, "attention_mask": decoder_attention_mask,
"encoder_hidden_states": encoder_hidden_states, "encoder_hidden_states": encoder_hidden_states,
...@@ -621,14 +614,11 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -621,14 +614,11 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
"past_key_values": past_key_values, "past_key_values": past_key_values,
"return_dict": return_dict, "return_dict": return_dict,
"training": training, "training": training,
"kwargs_call": {},
} }
# Add arguments to decoder from `kwargs_decoder` # Add arguments to decoder from `kwargs_decoder`
for k, v in kwargs_decoder.items(): decoder_inputs.update(kwargs_decoder)
decoder_processing_inputs[k] = v
decoder_inputs = input_processing(**decoder_processing_inputs)
decoder_outputs = self.decoder(**decoder_inputs) decoder_outputs = self.decoder(**decoder_inputs)
logits = decoder_outputs[0] logits = decoder_outputs[0]
......
...@@ -258,7 +258,6 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel): ...@@ -258,7 +258,6 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[Tuple, TFBaseModelOutput]: ) -> Union[Tuple, TFBaseModelOutput]:
outputs = self.transformer( outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
...@@ -490,7 +489,6 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer): ...@@ -490,7 +489,6 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[Tuple, TFBaseModelOutput]: ) -> Union[Tuple, TFBaseModelOutput]:
# removed: src_enc=None, src_len=None # removed: src_enc=None, src_len=None
...@@ -808,7 +806,6 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel): ...@@ -808,7 +806,6 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[Tuple, TFFlaubertWithLMHeadModelOutput]: ) -> Union[Tuple, TFFlaubertWithLMHeadModelOutput]:
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
......
...@@ -761,7 +761,6 @@ class TFFunnelBaseLayer(tf.keras.layers.Layer): ...@@ -761,7 +761,6 @@ class TFFunnelBaseLayer(tf.keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
...@@ -835,7 +834,6 @@ class TFFunnelMainLayer(tf.keras.layers.Layer): ...@@ -835,7 +834,6 @@ class TFFunnelMainLayer(tf.keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
...@@ -1117,7 +1115,6 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel): ...@@ -1117,7 +1115,6 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[Tuple[tf.Tensor], TFBaseModelOutput]: ) -> Union[Tuple[tf.Tensor], TFBaseModelOutput]:
return self.funnel( return self.funnel(
input_ids=input_ids, input_ids=input_ids,
...@@ -1165,7 +1162,6 @@ class TFFunnelModel(TFFunnelPreTrainedModel): ...@@ -1165,7 +1162,6 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[Tuple[tf.Tensor], TFBaseModelOutput]: ) -> Union[Tuple[tf.Tensor], TFBaseModelOutput]:
return self.funnel( return self.funnel(
...@@ -1293,7 +1289,6 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss) ...@@ -1293,7 +1289,6 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[Tuple[tf.Tensor], TFMaskedLMOutput]: ) -> Union[Tuple[tf.Tensor], TFMaskedLMOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1369,7 +1364,6 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass ...@@ -1369,7 +1364,6 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[Tuple[tf.Tensor], TFSequenceClassifierOutput]: ) -> Union[Tuple[tf.Tensor], TFSequenceClassifierOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -1455,7 +1449,6 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1455,7 +1449,6 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[Tuple[tf.Tensor], TFMultipleChoiceModelOutput]: ) -> Union[Tuple[tf.Tensor], TFMultipleChoiceModelOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -1566,7 +1559,6 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat ...@@ -1566,7 +1559,6 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[Tuple[tf.Tensor], TFTokenClassifierOutput]: ) -> Union[Tuple[tf.Tensor], TFTokenClassifierOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -1645,7 +1637,6 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL ...@@ -1645,7 +1637,6 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL
start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: bool = False, training: bool = False,
**kwargs,
) -> Union[Tuple[tf.Tensor], TFQuestionAnsweringModelOutput]: ) -> Union[Tuple[tf.Tensor], TFQuestionAnsweringModelOutput]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
......
...@@ -367,7 +367,6 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): ...@@ -367,7 +367,6 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
...@@ -730,7 +729,6 @@ class TFGPT2Model(TFGPT2PreTrainedModel): ...@@ -730,7 +729,6 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]: ) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, Tuple[tf.Tensor]]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
...@@ -920,7 +918,6 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -920,7 +918,6 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
...@@ -1038,7 +1035,6 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): ...@@ -1038,7 +1035,6 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFGPT2DoubleHeadsModelOutput, Tuple[tf.Tensor]]: ) -> Union[TFGPT2DoubleHeadsModelOutput, Tuple[tf.Tensor]]:
r""" r"""
mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input): mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input):
...@@ -1195,7 +1191,6 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific ...@@ -1195,7 +1191,6 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
) -> Union[TFSequenceClassifierOutputWithPast, Tuple[tf.Tensor]]: ) -> Union[TFSequenceClassifierOutputWithPast, Tuple[tf.Tensor]]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
......
...@@ -390,7 +390,6 @@ class TFGPTJMainLayer(tf.keras.layers.Layer): ...@@ -390,7 +390,6 @@ class TFGPTJMainLayer(tf.keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
**kwargs,
): ):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
...@@ -672,7 +671,6 @@ class TFGPTJModel(TFGPTJPreTrainedModel): ...@@ -672,7 +671,6 @@ class TFGPTJModel(TFGPTJPreTrainedModel):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
): ):
r""" r"""
use_cache (`bool`, *optional*, defaults to `True`): use_cache (`bool`, *optional*, defaults to `True`):
...@@ -781,7 +779,6 @@ class TFGPTJForCausalLM(TFGPTJPreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -781,7 +779,6 @@ class TFGPTJForCausalLM(TFGPTJPreTrainedModel, TFCausalLanguageModelingLoss):
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
): ):
r""" r"""
labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
...@@ -886,7 +883,6 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific ...@@ -886,7 +883,6 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
): ):
r""" r"""
labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*):
...@@ -1011,7 +1007,6 @@ class TFGPTJForQuestionAnswering(TFGPTJPreTrainedModel, TFQuestionAnsweringLoss) ...@@ -1011,7 +1007,6 @@ class TFGPTJForQuestionAnswering(TFGPTJPreTrainedModel, TFQuestionAnsweringLoss)
output_hidden_states: Optional[bool] = None, output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
training: Optional[bool] = False, training: Optional[bool] = False,
**kwargs,
): ):
r""" r"""
start_positions (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment