"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "776855c75275484a971d450a1a0dd4c5c8b5727b"
Unverified Commit 5fe06b9b authored by Rishav Chandra Varma's avatar Rishav Chandra Varma Committed by GitHub
Browse files

Adding missing type hints for mBART model (PyTorch) (#16429)



* added type hints for mbart tensorflow tf implementation

* Adding missing type hints for mBART model 

Tensorflow Implementation model added with missing type hints

* Missing Type hints - correction

For TF model

* Code fixup using make quality tests

* Hint types - typo error

* make fix-copies and make fixup

* type hints

* updated files

* type hints update

* making dependent modesls coherent
Co-authored-by: default avatarmatt <rocketknight1@gmail.com>
parent 9947dd07
...@@ -1478,7 +1478,7 @@ class BigBirdPegasusDecoderLayer(nn.Module): ...@@ -1478,7 +1478,7 @@ class BigBirdPegasusDecoderLayer(nn.Module):
past_key_value: Optional[Tuple[torch.Tensor]] = None, past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = True, use_cache: Optional[bool] = True,
): ) -> torch.Tensor:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)* hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
......
...@@ -294,7 +294,7 @@ class BlenderbotEncoderLayer(nn.Module): ...@@ -294,7 +294,7 @@ class BlenderbotEncoderLayer(nn.Module):
attention_mask: torch.Tensor, attention_mask: torch.Tensor,
layer_head_mask: torch.Tensor, layer_head_mask: torch.Tensor,
output_attentions: bool = False, output_attentions: bool = False,
): ) -> torch.Tensor:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)* hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
...@@ -378,7 +378,7 @@ class BlenderbotDecoderLayer(nn.Module): ...@@ -378,7 +378,7 @@ class BlenderbotDecoderLayer(nn.Module):
past_key_value: Optional[Tuple[torch.Tensor]] = None, past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = True, use_cache: Optional[bool] = True,
): ) -> torch.Tensor:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)* hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
......
...@@ -363,7 +363,7 @@ class M2M100EncoderLayer(nn.Module): ...@@ -363,7 +363,7 @@ class M2M100EncoderLayer(nn.Module):
attention_mask: torch.Tensor, attention_mask: torch.Tensor,
layer_head_mask: torch.Tensor, layer_head_mask: torch.Tensor,
output_attentions: bool = False, output_attentions: bool = False,
): ) -> torch.Tensor:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)* hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
...@@ -447,7 +447,7 @@ class M2M100DecoderLayer(nn.Module): ...@@ -447,7 +447,7 @@ class M2M100DecoderLayer(nn.Module):
past_key_value: Optional[Tuple[torch.Tensor]] = None, past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = True, use_cache: Optional[bool] = True,
): ) -> torch.Tensor:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)* hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
......
...@@ -307,7 +307,7 @@ class MBartEncoderLayer(nn.Module): ...@@ -307,7 +307,7 @@ class MBartEncoderLayer(nn.Module):
attention_mask: torch.Tensor, attention_mask: torch.Tensor,
layer_head_mask: torch.Tensor, layer_head_mask: torch.Tensor,
output_attentions: bool = False, output_attentions: bool = False,
): ) -> torch.Tensor:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)* hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
...@@ -390,7 +390,7 @@ class MBartDecoderLayer(nn.Module): ...@@ -390,7 +390,7 @@ class MBartDecoderLayer(nn.Module):
past_key_value: Optional[Tuple[torch.Tensor]] = None, past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = True, use_cache: Optional[bool] = True,
): ) -> torch.Tensor:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)* hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
...@@ -722,14 +722,14 @@ class MBartEncoder(MBartPreTrainedModel): ...@@ -722,14 +722,14 @@ class MBartEncoder(MBartPreTrainedModel):
def forward( def forward(
self, self,
input_ids=None, input_ids: torch.LongTensor = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
head_mask=None, head_mask: Optional[torch.Tensor] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.FloatTensor] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Tuple, BaseModelOutput]:
r""" r"""
Args: Args:
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
...@@ -913,19 +913,19 @@ class MBartDecoder(MBartPreTrainedModel): ...@@ -913,19 +913,19 @@ class MBartDecoder(MBartPreTrainedModel):
def forward( def forward(
self, self,
input_ids=None, input_ids: torch.LongTensor = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
encoder_hidden_states=None, encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask=None, encoder_attention_mask: Optional[torch.LongTensor] = None,
head_mask=None, head_mask: Optional[torch.Tensor] = None,
cross_attn_head_mask=None, cross_attn_head_mask: Optional[torch.Tensor] = None,
past_key_values=None, past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.FloatTensor] = None,
use_cache=None, use_cache: Optional[bool] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Tuple, BaseModelOutputWithPastAndCrossAttentions]:
r""" r"""
Args: Args:
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
...@@ -1168,22 +1168,22 @@ class MBartModel(MBartPreTrainedModel): ...@@ -1168,22 +1168,22 @@ class MBartModel(MBartPreTrainedModel):
) )
def forward( def forward(
self, self,
input_ids=None, input_ids: torch.LongTensor = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
decoder_input_ids=None, decoder_input_ids: Optional[torch.LongTensor] = None,
decoder_attention_mask=None, decoder_attention_mask: Optional[torch.LongTensor] = None,
head_mask=None, head_mask: Optional[torch.Tensor] = None,
decoder_head_mask=None, decoder_head_mask: Optional[torch.Tensor] = None,
cross_attn_head_mask=None, cross_attn_head_mask: Optional[torch.Tensor] = None,
encoder_outputs=None, encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_values=None, past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.FloatTensor] = None,
decoder_inputs_embeds=None, decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
use_cache=None, use_cache: Optional[bool] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Seq2SeqModelOutput, Tuple[torch.FloatTensor]]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
...@@ -1297,23 +1297,23 @@ class MBartForConditionalGeneration(MBartPreTrainedModel): ...@@ -1297,23 +1297,23 @@ class MBartForConditionalGeneration(MBartPreTrainedModel):
@add_end_docstrings(MBART_GENERATION_EXAMPLE) @add_end_docstrings(MBART_GENERATION_EXAMPLE)
def forward( def forward(
self, self,
input_ids=None, input_ids: torch.LongTensor = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
decoder_input_ids=None, decoder_input_ids: Optional[torch.LongTensor] = None,
decoder_attention_mask=None, decoder_attention_mask: Optional[torch.LongTensor] = None,
head_mask=None, head_mask: Optional[torch.Tensor] = None,
decoder_head_mask=None, decoder_head_mask: Optional[torch.Tensor] = None,
cross_attn_head_mask=None, cross_attn_head_mask: Optional[torch.Tensor] = None,
encoder_outputs=None, encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
past_key_values=None, past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.FloatTensor] = None,
decoder_inputs_embeds=None, decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
labels=None, labels: Optional[torch.LongTensor] = None,
use_cache=None, use_cache: Optional[bool] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Seq2SeqLMOutput, Tuple[torch.FloatTensor]]:
r""" r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
......
...@@ -309,7 +309,7 @@ class PegasusEncoderLayer(nn.Module): ...@@ -309,7 +309,7 @@ class PegasusEncoderLayer(nn.Module):
attention_mask: torch.Tensor, attention_mask: torch.Tensor,
layer_head_mask: torch.Tensor, layer_head_mask: torch.Tensor,
output_attentions: bool = False, output_attentions: bool = False,
): ) -> torch.Tensor:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)* hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
...@@ -393,7 +393,7 @@ class PegasusDecoderLayer(nn.Module): ...@@ -393,7 +393,7 @@ class PegasusDecoderLayer(nn.Module):
past_key_value: Optional[Tuple[torch.Tensor]] = None, past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = True, use_cache: Optional[bool] = True,
): ) -> torch.Tensor:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)* hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
......
...@@ -423,7 +423,7 @@ class XGLMDecoderLayer(nn.Module): ...@@ -423,7 +423,7 @@ class XGLMDecoderLayer(nn.Module):
past_key_value: Optional[Tuple[torch.Tensor]] = None, past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False, output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = True, use_cache: Optional[bool] = True,
): ) -> torch.Tensor:
""" """
Args: Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)* hidden_states (`torch.FloatTensor`): input to the layer of shape *(seq_len, batch, embed_dim)*
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment