Add type hints for tf models batch 1 (#25853)

* Add type hints to `TFBlipTextModel` * Add missing type hints to DPR family models * Add type hints to `TFLEDModel` * Add type hints to `TFLxmertForPreTraining` * Add missing type hints to `TFMarianMTModel` and `TFMarianModel` * Add missing type hints to `TFRagModel` & `TFRagTokenForGeneration` * Make type hints annotations consistent

Add type hints for tf models batch 1 (#25853)
* Add type hints to `TFBlipTextModel` * Add missing type hints to DPR family models * Add type hints to `TFLEDModel` * Add type hints to `TFLxmertForPreTraining` * Add missing type hints to `TFMarianMTModel` and `TFMarianModel` * Add missing type hints to `TFRagModel` & `TFRagTokenForGeneration` * Make type hints annotations consistent
eaf5e98e · David Reguera · GitHub · 9c5acca0 · eaf5e98e · eaf5e98e
Unverified Commit eaf5e98e authored Aug 31, 2023 by David Reguera Committed by GitHub Aug 31, 2023
6 changed files
--- a/src/transformers/models/blip/modeling_tf_blip_text.py
+++ b/src/transformers/models/blip/modeling_tf_blip_text.py
@@ -27,6 +27,7 @@ from ...modeling_tf_outputs import (
    TFCausalLMOutputWithCrossAttentions,
 )
 from ...modeling_tf_utils import (
+    TFModelInputType,
    TFPreTrainedModel,
    get_initializer,
    get_tf_activation,
@@ -668,22 +669,22 @@ class TFBlipTextModel(TFBlipTextPreTrainedModel):
    @unpack_inputs
    def call(
        self,
-        input_ids=None,
-        attention_mask=None,
-        position_ids=None,
-        head_mask=None,
-        inputs_embeds=None,
-        encoder_embeds=None,
-        encoder_hidden_states=None,
-        encoder_attention_mask=None,
-        past_key_values=None,
-        use_cache=None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_dict=None,
-        is_decoder=False,
-        training=None,
-    ):
+        input_ids: TFModelInputType | None = None,
+        attention_mask: tf.Tensor | None = None,
+        position_ids: tf.Tensor | None = None,
+        head_mask: tf.Tensor | None = None,
+        inputs_embeds: tf.Tensor | None = None,
+        encoder_embeds: tf.Tensor | None = None,
+        encoder_hidden_states: tf.Tensor | None = None,
+        encoder_attention_mask: tf.Tensor | None = None,
+        past_key_values: Tuple[Tuple[tf.Tensor]] | None = None,
+        use_cache: bool | None = None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        return_dict: bool | None = None,
+        is_decoder: bool = False,
+        training: bool = False,
+    ) -> Tuple[tf.Tensor] | TFBaseModelOutputWithPoolingAndCrossAttentions:
        r"""
        encoder_hidden_states  (`tf.Tensor`, *optional*):
            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if

--- a/src/transformers/models/dpr/modeling_tf_dpr.py
+++ b/src/transformers/models/dpr/modeling_tf_dpr.py
@@ -23,7 +23,7 @@ from typing import Tuple, Union
 import tensorflow as tf

 from ...modeling_tf_outputs import TFBaseModelOutputWithPooling
-from ...modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list, unpack_inputs
+from ...modeling_tf_utils import TFModelInputType, TFPreTrainedModel, get_initializer, shape_list, unpack_inputs
 from ...utils import (
    ModelOutput,
    add_start_docstrings,
@@ -540,15 +540,15 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder):
    @replace_return_docstrings(output_type=TFDPRContextEncoderOutput, config_class=_CONFIG_FOR_DOC)
    def call(
        self,
-        input_ids=None,
+        input_ids: TFModelInputType | None = None,
        attention_mask: tf.Tensor | None = None,
        token_type_ids: tf.Tensor | None = None,
        inputs_embeds: tf.Tensor | None = None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_dict=None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        return_dict: bool | None = None,
        training: bool = False,
-    ) -> Union[TFDPRContextEncoderOutput, Tuple[tf.Tensor, ...]]:
+    ) -> TFDPRContextEncoderOutput | Tuple[tf.Tensor, ...]:
        r"""
        Return:

@@ -621,15 +621,15 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder):
    @replace_return_docstrings(output_type=TFDPRQuestionEncoderOutput, config_class=_CONFIG_FOR_DOC)
    def call(
        self,
-        input_ids=None,
+        input_ids: TFModelInputType | None = None,
        attention_mask: tf.Tensor | None = None,
        token_type_ids: tf.Tensor | None = None,
        inputs_embeds: tf.Tensor | None = None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_dict=None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        return_dict: bool | None = None,
        training: bool = False,
-    ) -> Union[TFDPRQuestionEncoderOutput, Tuple[tf.Tensor, ...]]:
+    ) -> TFDPRQuestionEncoderOutput | Tuple[tf.Tensor, ...]:
        r"""
        Return:

@@ -701,14 +701,14 @@ class TFDPRReader(TFDPRPretrainedReader):
    @replace_return_docstrings(output_type=TFDPRReaderOutput, config_class=_CONFIG_FOR_DOC)
    def call(
        self,
-        input_ids=None,
+        input_ids: TFModelInputType | None = None,
        attention_mask: tf.Tensor | None = None,
        inputs_embeds: tf.Tensor | None = None,
-        output_attentions: bool = None,
-        output_hidden_states: bool = None,
-        return_dict=None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        return_dict: bool | None = None,
        training: bool = False,
-    ) -> Union[TFDPRReaderOutput, Tuple[tf.Tensor, ...]]:
+    ) -> TFDPRReaderOutput | Tuple[tf.Tensor, ...]:
        r"""
        Return:


--- a/src/transformers/models/led/modeling_tf_led.py
+++ b/src/transformers/models/led/modeling_tf_led.py
@@ -1571,7 +1571,7 @@ LED_INPUTS_DOCSTRING = r"""
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
-        decoder_input_ids (`tf.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
+        decoder_input_ids (`tf.Tensor` of shape `(batch_size, target_sequence_length)`, *optional*):
            Indices of decoder input sequence tokens in the vocabulary.

            Indices can be obtained using [`LedTokenizer`]. See [`PreTrainedTokenizer.encode`] and
@@ -1595,7 +1595,7 @@ LED_INPUTS_DOCSTRING = r"""
            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

-        encoder_outputs (`tf.FloatTensor`, *optional*):
+        encoder_outputs (`tf.Tensor`, *optional*):
            hidden states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.
            of shape `(batch_size, sequence_length, hidden_size)` is a sequence of
        past_key_values (`Tuple[Tuple[tf.Tensor]]` of length `config.n_layers`)
@@ -2236,24 +2236,24 @@ class TFLEDModel(TFLEDPreTrainedModel):
    )
    def call(
        self,
-        input_ids=None,
-        attention_mask=None,
-        decoder_input_ids=None,
-        decoder_attention_mask=None,
-        head_mask=None,
-        decoder_head_mask=None,
-        encoder_outputs: Optional[Union[Tuple, TFLEDEncoderBaseModelOutput]] = None,
-        global_attention_mask=None,
-        past_key_values=None,
-        inputs_embeds=None,
-        decoder_inputs_embeds=None,
-        use_cache=None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_dict=None,
-        training=False,
+        input_ids: TFModelInputType | None = None,
+        attention_mask: tf.Tensor | None = None,
+        decoder_input_ids: tf.Tensor | None = None,
+        decoder_attention_mask: tf.Tensor | None = None,
+        head_mask: tf.Tensor | None = None,
+        decoder_head_mask: tf.Tensor | None = None,
+        encoder_outputs: tf.Tensor | None = None,
+        global_attention_mask: tf.Tensor | None = None,
+        past_key_values: Tuple[Tuple[tf.Tensor]] | None = None,
+        inputs_embeds: tf.Tensor | None = None,
+        decoder_inputs_embeds: tf.Tensor | None = None,
+        use_cache: bool | None = None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        return_dict: bool | None = None,
+        training: bool = False,
        **kwargs,
-    ):
+    ) -> Tuple[tf.Tensor] | TFLEDSeq2SeqModelOutput:
        outputs = self.led(
            input_ids=input_ids,
            attention_mask=attention_mask,
@@ -2371,18 +2371,18 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel):
        decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        decoder_head_mask: np.ndarray | tf.Tensor | None = None,
-        encoder_outputs: Optional[TFLEDEncoderBaseModelOutput] = None,
+        encoder_outputs: TFLEDEncoderBaseModelOutput | None = None,
        global_attention_mask: np.ndarray | tf.Tensor | None = None,
-        past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
+        past_key_values: Tuple[Tuple[Union[np.ndarray, tf.Tensor]]] | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
        decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
+        use_cache: bool | None = None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        return_dict: bool | None = None,
        labels: tf.Tensor | None = None,
        training: bool = False,
-    ):
+    ) -> Tuple[tf.Tensor] | TFLEDSeq2SeqLMOutput:
        """
        Returns:


--- a/src/transformers/models/lxmert/modeling_tf_lxmert.py
+++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py
@@ -1251,22 +1251,22 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel):
    @replace_return_docstrings(output_type=TFLxmertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
    def call(
        self,
-        input_ids=None,
-        visual_feats=None,
-        visual_pos=None,
-        attention_mask=None,
-        visual_attention_mask=None,
-        token_type_ids=None,
-        inputs_embeds=None,
-        masked_lm_labels=None,
-        obj_labels=None,
-        matched_label=None,
-        ans=None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_dict=None,
-        training=False,
-    ):
+        input_ids: TFModelInputType | None = None,
+        visual_feats: tf.Tensor | None = None,
+        visual_pos: tf.Tensor | None = None,
+        attention_mask: tf.Tensor | None = None,
+        visual_attention_mask: tf.Tensor | None = None,
+        token_type_ids: tf.Tensor | None = None,
+        inputs_embeds: tf.Tensor | None = None,
+        masked_lm_labels: tf.Tensor | None = None,
+        obj_labels: Dict[str, Tuple[tf.Tensor, tf.Tensor]] | None = None,
+        matched_label: tf.Tensor | None = None,
+        ans: tf.Tensor | None = None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        return_dict: bool | None = None,
+        training: bool = False,
+    ) -> Tuple[tf.Tensor] | TFLxmertForPreTrainingOutput:
        r"""
        masked_lm_labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@@ -1282,7 +1282,7 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel):

            - 0 indicates that the sentence does not match the image,
            - 1 indicates that the sentence does match the image.
-        ans (`Torch.Tensor` of shape `(batch_size)`, *optional*, defaults to `None`):
+        ans (`tf.Tensor` of shape `(batch_size)`, *optional*, defaults to `None`):
            a one hot representation hof the correct answer *optional*

        Returns:

--- a/src/transformers/models/marian/modeling_tf_marian.py
+++ b/src/transformers/models/marian/modeling_tf_marian.py
@@ -1187,13 +1187,13 @@ class TFMarianModel(TFMarianPreTrainedModel):
        past_key_values: Tuple[Tuple[tf.Tensor]] | None = None,
        inputs_embeds: tf.Tensor | None = None,
        decoder_inputs_embeds: tf.Tensor | None = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
+        use_cache: bool | None = None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        return_dict: bool | None = None,
        training: bool = False,
        **kwargs,
-    ):
+    ) -> Tuple[tf.Tensor] | TFSeq2SeqModelOutput:
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
@@ -1311,17 +1311,17 @@ class TFMarianMTModel(TFMarianPreTrainedModel, TFCausalLanguageModelingLoss):
        head_mask: tf.Tensor | None = None,
        decoder_head_mask: tf.Tensor | None = None,
        cross_attn_head_mask: tf.Tensor | None = None,
-        encoder_outputs: Optional[TFBaseModelOutput] = None,
+        encoder_outputs: TFBaseModelOutput | None = None,
        past_key_values: Tuple[Tuple[tf.Tensor]] | None = None,
        inputs_embeds: tf.Tensor | None = None,
        decoder_inputs_embeds: tf.Tensor | None = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
+        use_cache: bool | None = None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        return_dict: bool | None = None,
        labels: tf.Tensor | None = None,
        training: bool = False,
-    ):
+    ) -> Tuple[tf.Tensor] | TFSeq2SeqLMOutput:
        r"""
        labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

--- a/src/transformers/models/rag/modeling_tf_rag.py
+++ b/src/transformers/models/rag/modeling_tf_rag.py
@@ -553,19 +553,19 @@ class TFRagModel(TFRagPreTrainedModel):
        encoder_outputs: np.ndarray | tf.Tensor | None = None,
        decoder_input_ids: np.ndarray | tf.Tensor | None = None,
        decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
-        past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
+        past_key_values: Tuple[Tuple[Union[np.ndarray, tf.Tensor]]] | None = None,
        doc_scores: np.ndarray | tf.Tensor | None = None,
        context_input_ids: np.ndarray | tf.Tensor | None = None,
        context_attention_mask: np.ndarray | tf.Tensor | None = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        output_retrieved: Optional[bool] = None,
-        n_docs: Optional[int] = None,
-        return_dict: Optional[bool] = None,
+        use_cache: bool | None = None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        output_retrieved: bool | None = None,
+        n_docs: int | None = None,
+        return_dict: bool | None = None,
        training: bool = False,
        **kwargs,
-    ):
+    ) -> TFRetrievAugLMOutput:
        r"""
        Returns:

@@ -849,22 +849,22 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
        decoder_input_ids: np.ndarray | tf.Tensor | None = None,
        decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
        encoder_outputs: np.ndarray | tf.Tensor | None = None,
-        past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
+        past_key_values: Tuple[Tuple[Union[np.ndarray, tf.Tensor]]] | None = None,
        doc_scores: np.ndarray | tf.Tensor | None = None,
        context_input_ids: np.ndarray | tf.Tensor | None = None,
        context_attention_mask: np.ndarray | tf.Tensor | None = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        output_retrieved: Optional[bool] = None,
-        n_docs: Optional[int] = None,
-        do_marginalize: Optional[bool] = None,
+        use_cache: bool | None = None,
+        output_attentions: bool | None = None,
+        output_hidden_states: bool | None = None,
+        output_retrieved: bool | None = None,
+        n_docs: int | None = None,
+        do_marginalize: bool | None = None,
        labels: np.ndarray | tf.Tensor | None = None,
-        reduce_loss: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
+        reduce_loss: bool | None = None,
+        return_dict: bool | None = None,
        training: bool = False,
        **kwargs,  # needs kwargs for generation
-    ):
+    ) -> TFRetrievAugLMMarginOutput:
        r"""
        do_marginalize (`bool`, *optional*):
            If `True`, the logits are marginalized over all documents by making use of