Add type hints for tf models final batch (#25883)

* Add missing type hints and consistency to `RegNet` models * Add missing type hints and consistency to `TFSamModel` * Add missing type hints to `TFSegformerDecodeHead` * Add missing type hints and consistency to `TransfoXL` family models * Add missing type hints and consistency to `TFWav2Vec2ForSequenceClassification` * Add type hints to `TFXLMModel` * Fix linter * Revert the type hints for `RegNet` to python 3.8 compliant * Remove the redundant np.ndarray type hint.

Add type hints for tf models final batch (#25883)
* Add missing type hints and consistency to `RegNet` models * Add missing type hints and consistency to `TFSamModel` * Add missing type hints to `TFSegformerDecodeHead` * Add missing type hints and consistency to `TransfoXL` family models * Add missing type hints and consistency to `TFWav2Vec2ForSequenceClassification` * Add type hints to `TFXLMModel` * Fix linter * Revert the type hints for `RegNet` to python 3.8 compliant * Remove the redundant np.ndarray type hint.
040c4613 · David Reguera · GitHub · 44d2c199 · 040c4613 · 040c4613
Unverified Commit 040c4613 authored Sep 04, 2023 by David Reguera Committed by GitHub Sep 04, 2023
6 changed files
--- a/src/transformers/models/regnet/modeling_tf_regnet.py
+++ b/src/transformers/models/regnet/modeling_tf_regnet.py
@@ -396,7 +396,7 @@ class TFRegNetModel(TFRegNetPreTrainedModel):
        pixel_values: tf.Tensor,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
-        training=False,
+        training: bool = False,
    ) -> Union[TFBaseModelOutputWithPoolingAndNoAttention, Tuple[tf.Tensor]]:
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@@ -447,11 +447,11 @@ class TFRegNetForImageClassification(TFRegNetPreTrainedModel, TFSequenceClassifi
    )
    def call(
        self,
-        pixel_values: tf.Tensor = None,
+        pixel_values: Optional[tf.Tensor] = None,
-        labels: tf.Tensor = None,
+        labels: Optional[tf.Tensor] = None,
-        output_hidden_states: bool = None,
+        output_hidden_states: Optional[bool] = None,
-        return_dict: bool = None,
+        return_dict: Optional[bool] = None,
-        training=False,
+        training: bool = False,
    ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]:
        r"""
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):

--- a/src/transformers/models/sam/modeling_tf_sam.py
+++ b/src/transformers/models/sam/modeling_tf_sam.py
@@ -22,7 +22,7 @@ from __future__ import annotations
 import collections
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Optional, Tuple, Union
 import numpy as np
 import tensorflow as tf
@@ -1335,12 +1335,12 @@ class TFSamModel(TFSamPreTrainedModel):
        input_masks: tf.Tensor | None = None,
        image_embeddings: tf.Tensor | None = None,
        multimask_output: bool = True,
-        output_attentions: Optional[bool] = None,
+        output_attentions: bool | None = None,
-        output_hidden_states: Optional[bool] = None,
+        output_hidden_states: bool | None = None,
-        return_dict=None,
+        return_dict: bool | None = None,
-        training=False,
+        training: bool = False,
        **kwargs,
-    ) -> List[Dict[str, tf.Tensor]]:
+    ) -> TFSamImageSegmentationOutput | Tuple[tf.Tensor]:
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states

--- a/src/transformers/models/segformer/modeling_tf_segformer.py
+++ b/src/transformers/models/segformer/modeling_tf_segformer.py
@@ -709,7 +709,7 @@ class TFSegformerDecodeHead(TFSegformerPreTrainedModel):
        self.config = config
-    def call(self, encoder_hidden_states, training: bool = False):
+    def call(self, encoder_hidden_states: tf.Tensor, training: bool = False) -> tf.Tensor:
        all_hidden_states = ()
        for encoder_hidden_state, mlp in zip(encoder_hidden_states, self.mlps):
            if self.config.reshape_last_stage is False and len(shape_list(encoder_hidden_state)) == 3:

--- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py
+++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py
@@ -870,11 +870,11 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
        mems: List[tf.Tensor] | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
-        output_attentions: Optional[bool] = None,
+        output_attentions: bool | None = None,
-        output_hidden_states: Optional[bool] = None,
+        output_hidden_states: bool | None = None,
-        return_dict: Optional[bool] = None,
+        return_dict: bool | None = None,
        training: bool = False,
-    ):
+    ) -> TFTransfoXLModelOutput | Tuple[tf.Tensor]:
        outputs = self.transformer(
            input_ids=input_ids,
            mems=mems,
@@ -938,12 +938,12 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
        mems: List[tf.Tensor] | None = None,
        head_mask: np.ndarray | tf.Tensor | None = None,
        inputs_embeds: np.ndarray | tf.Tensor | None = None,
-        output_attentions: Optional[bool] = None,
+        output_attentions: bool | None = None,
-        output_hidden_states: Optional[bool] = None,
+        output_hidden_states: bool | None = None,
-        return_dict: Optional[bool] = None,
+        return_dict: bool | None = None,
        labels: np.ndarray | tf.Tensor | None = None,
        training: bool = False,
-    ):
+    ) -> TFTransfoXLLMHeadModelOutput | Tuple[tf.Tensor]:
        if input_ids is not None:
            bsz, tgt_len = shape_list(input_ids)[:2]
        else:

--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -1619,12 +1619,12 @@ class TFWav2Vec2ForSequenceClassification(TFWav2Vec2PreTrainedModel):
        self,
        input_values: tf.Tensor,
        attention_mask: tf.Tensor | None = None,
-        output_attentions: Optional[bool] = None,
+        output_attentions: bool | None = None,
-        output_hidden_states: Optional[bool] = None,
+        output_hidden_states: bool | None = None,
-        return_dict: Optional[bool] = None,
+        return_dict: bool | None = None,
        labels: tf.Tensor | None = None,
        training: bool = False,
-    ):
+    ) -> TFSequenceClassifierOutput | Tuple[tf.Tensor]:
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
        output_hidden_states = True if self.config.use_weighted_layer_sum else output_hidden_states

--- a/src/transformers/models/xlm/modeling_tf_xlm.py
+++ b/src/transformers/models/xlm/modeling_tf_xlm.py
@@ -649,9 +649,8 @@ XLM_INPUTS_DOCSTRING = r"""
            also use *attention_mask* for the same result (see above), kept here for compatibility. Indices selected in
            `[0, ..., input_ids.size(-1)]`.
        cache (`Dict[str, tf.Tensor]`, *optional*):
-            Dictionary string to `torch.FloatTensor` that contains precomputed hidden states (key and values in the
+            Dictionary string to `tf.Tensor` that contains precomputed hidden states (key and values in the attention
-            attention blocks) as computed by the model (see `cache` output below). Can be used to speed up sequential
+            blocks) as computed by the model (see `cache` output below). Can be used to speed up sequential decoding.
-            decoding.
            The dictionary object will be modified in-place during the forward pass to add newly computed
            hidden-states.
@@ -700,20 +699,20 @@ class TFXLMModel(TFXLMPreTrainedModel):
    )
    def call(
        self,
-        input_ids=None,
+        input_ids: TFModelInputType | None = None,
-        attention_mask=None,
+        attention_mask: tf.Tensor | None = None,
-        langs=None,
+        langs: tf.Tensor | None = None,
-        token_type_ids=None,
+        token_type_ids: tf.Tensor | None = None,
-        position_ids=None,
+        position_ids: tf.Tensor | None = None,
-        lengths=None,
+        lengths: tf.Tensor | None = None,
-        cache=None,
+        cache: Dict[str, tf.Tensor] | None = None,
-        head_mask=None,
+        head_mask: tf.Tensor | None = None,
-        inputs_embeds=None,
+        inputs_embeds: tf.Tensor | None = None,
-        output_attentions=None,
+        output_attentions: bool | None = None,
-        output_hidden_states=None,
+        output_hidden_states: bool | None = None,
-        return_dict=None,
+        return_dict: bool | None = None,
-        training=False,
+        training: bool = False,
-    ) -> Union[TFBaseModelOutput, Tuple[tf.Tensor]]:
+    ) -> TFBaseModelOutput | Tuple[tf.Tensor]:
        outputs = self.transformer(
            input_ids=input_ids,
            attention_mask=attention_mask,