"docs/source/ko/quicktour.md" did not exist on "d59034ff6fb787d48c5dafcb09a5cea561ac3273"
Unverified Commit 7b262b96 authored by Adam Montgomerie's avatar Adam Montgomerie Committed by GitHub
Browse files

Funnel type hints (#16323)

* add pt funnel type hints

* add tf funnel type hints
parent deb61e5f
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import List, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -157,13 +157,15 @@ def load_tf_weights_in_funnel(model, config, tf_checkpoint_path): ...@@ -157,13 +157,15 @@ def load_tf_weights_in_funnel(model, config, tf_checkpoint_path):
class FunnelEmbeddings(nn.Module): class FunnelEmbeddings(nn.Module):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__() super().__init__()
self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id) self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
self.layer_norm = nn.LayerNorm(config.d_model, eps=config.layer_norm_eps) self.layer_norm = nn.LayerNorm(config.d_model, eps=config.layer_norm_eps)
self.dropout = nn.Dropout(config.hidden_dropout) self.dropout = nn.Dropout(config.hidden_dropout)
def forward(self, input_ids=None, inputs_embeds=None): def forward(
self, input_ids: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.Tensor] = None
) -> torch.Tensor:
if inputs_embeds is None: if inputs_embeds is None:
inputs_embeds = self.word_embeddings(input_ids) inputs_embeds = self.word_embeddings(input_ids)
embeddings = self.layer_norm(inputs_embeds) embeddings = self.layer_norm(inputs_embeds)
...@@ -178,7 +180,7 @@ class FunnelAttentionStructure(nn.Module): ...@@ -178,7 +180,7 @@ class FunnelAttentionStructure(nn.Module):
cls_token_type_id: int = 2 cls_token_type_id: int = 2
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
self.sin_dropout = nn.Dropout(config.hidden_dropout) self.sin_dropout = nn.Dropout(config.hidden_dropout)
...@@ -187,7 +189,12 @@ class FunnelAttentionStructure(nn.Module): ...@@ -187,7 +189,12 @@ class FunnelAttentionStructure(nn.Module):
# divided. # divided.
self.pooling_mult = None self.pooling_mult = None
def init_attention_inputs(self, inputs_embeds, attention_mask=None, token_type_ids=None): def init_attention_inputs(
self,
inputs_embeds: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
token_type_ids: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor]:
"""Returns the attention inputs associated to the inputs of the model.""" """Returns the attention inputs associated to the inputs of the model."""
# inputs_embeds has shape batch_size x seq_len x d_model # inputs_embeds has shape batch_size x seq_len x d_model
# attention_mask and token_type_ids have shape batch_size x seq_len # attention_mask and token_type_ids have shape batch_size x seq_len
...@@ -202,7 +209,7 @@ class FunnelAttentionStructure(nn.Module): ...@@ -202,7 +209,7 @@ class FunnelAttentionStructure(nn.Module):
) )
return (position_embeds, token_type_mat, attention_mask, cls_mask) return (position_embeds, token_type_mat, attention_mask, cls_mask)
def token_type_ids_to_mat(self, token_type_ids): def token_type_ids_to_mat(self, token_type_ids: torch.Tensor) -> torch.Tensor:
"""Convert `token_type_ids` to `token_type_mat`.""" """Convert `token_type_ids` to `token_type_mat`."""
token_type_mat = token_type_ids[:, :, None] == token_type_ids[:, None] token_type_mat = token_type_ids[:, :, None] == token_type_ids[:, None]
# Treat <cls> as in the same segment as both A & B # Treat <cls> as in the same segment as both A & B
...@@ -210,7 +217,9 @@ class FunnelAttentionStructure(nn.Module): ...@@ -210,7 +217,9 @@ class FunnelAttentionStructure(nn.Module):
cls_mat = cls_ids[:, :, None] | cls_ids[:, None] cls_mat = cls_ids[:, :, None] | cls_ids[:, None]
return cls_mat | token_type_mat return cls_mat | token_type_mat
def get_position_embeds(self, seq_len, dtype, device): def get_position_embeds(
self, seq_len: int, dtype: torch.dtype, device: torch.device
) -> Union[Tuple[torch.Tensor], List[List[torch.Tensor]]]:
""" """
Create and cache inputs related to relative position encoding. Those are very different depending on whether we Create and cache inputs related to relative position encoding. Those are very different depending on whether we
are using the factorized or the relative shift attention: are using the factorized or the relative shift attention:
...@@ -288,7 +297,7 @@ class FunnelAttentionStructure(nn.Module): ...@@ -288,7 +297,7 @@ class FunnelAttentionStructure(nn.Module):
position_embeds_list.append([position_embeds_no_pooling, position_embeds_pooling]) position_embeds_list.append([position_embeds_no_pooling, position_embeds_pooling])
return position_embeds_list return position_embeds_list
def stride_pool_pos(self, pos_id, block_index): def stride_pool_pos(self, pos_id: torch.Tensor, block_index: int):
""" """
Pool `pos_id` while keeping the cls token separate (if `config.separate_cls=True`). Pool `pos_id` while keeping the cls token separate (if `config.separate_cls=True`).
""" """
...@@ -303,7 +312,7 @@ class FunnelAttentionStructure(nn.Module): ...@@ -303,7 +312,7 @@ class FunnelAttentionStructure(nn.Module):
else: else:
return pos_id[::2] return pos_id[::2]
def relative_pos(self, pos, stride, pooled_pos=None, shift=1): def relative_pos(self, pos: torch.Tensor, stride: int, pooled_pos=None, shift: int = 1) -> torch.Tensor:
""" """
Build the relative positional vector between `pos` and `pooled_pos`. Build the relative positional vector between `pos` and `pooled_pos`.
""" """
...@@ -317,7 +326,11 @@ class FunnelAttentionStructure(nn.Module): ...@@ -317,7 +326,11 @@ class FunnelAttentionStructure(nn.Module):
return torch.arange(max_dist, min_dist - 1, -stride, dtype=torch.long, device=pos.device) return torch.arange(max_dist, min_dist - 1, -stride, dtype=torch.long, device=pos.device)
def stride_pool(self, tensor, axis): def stride_pool(
self,
tensor: Union[torch.Tensor, Tuple[torch.Tensor], List[torch.Tensor]],
axis: Union[int, Tuple[int], List[int]],
) -> torch.Tensor:
""" """
Perform pooling by stride slicing the tensor along the given axis. Perform pooling by stride slicing the tensor along the given axis.
""" """
...@@ -346,7 +359,9 @@ class FunnelAttentionStructure(nn.Module): ...@@ -346,7 +359,9 @@ class FunnelAttentionStructure(nn.Module):
tensor = torch.cat([tensor[cls_slice], tensor], axis=axis) tensor = torch.cat([tensor[cls_slice], tensor], axis=axis)
return tensor[enc_slice] return tensor[enc_slice]
def pool_tensor(self, tensor, mode="mean", stride=2): def pool_tensor(
self, tensor: Union[torch.Tensor, Tuple[torch.Tensor], List[torch.Tensor]], mode: str = "mean", stride: int = 2
) -> torch.Tensor:
"""Apply 1D pooling to a tensor of size [B x T (x H)].""" """Apply 1D pooling to a tensor of size [B x T (x H)]."""
if tensor is None: if tensor is None:
return None return None
...@@ -382,7 +397,9 @@ class FunnelAttentionStructure(nn.Module): ...@@ -382,7 +397,9 @@ class FunnelAttentionStructure(nn.Module):
return tensor[:, 0] return tensor[:, 0]
return tensor return tensor
def pre_attention_pooling(self, output, attention_inputs): def pre_attention_pooling(
self, output, attention_inputs: Tuple[torch.Tensor]
) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
"""Pool `output` and the proper parts of `attention_inputs` before the attention layer.""" """Pool `output` and the proper parts of `attention_inputs` before the attention layer."""
position_embeds, token_type_mat, attention_mask, cls_mask = attention_inputs position_embeds, token_type_mat, attention_mask, cls_mask = attention_inputs
if self.config.pool_q_only: if self.config.pool_q_only:
...@@ -402,7 +419,7 @@ class FunnelAttentionStructure(nn.Module): ...@@ -402,7 +419,7 @@ class FunnelAttentionStructure(nn.Module):
attention_inputs = (position_embeds, token_type_mat, attention_mask, cls_mask) attention_inputs = (position_embeds, token_type_mat, attention_mask, cls_mask)
return output, attention_inputs return output, attention_inputs
def post_attention_pooling(self, attention_inputs): def post_attention_pooling(self, attention_inputs: Tuple[torch.Tensor]) -> Tuple[torch.Tensor]:
"""Pool the proper parts of `attention_inputs` after the attention layer.""" """Pool the proper parts of `attention_inputs` after the attention layer."""
position_embeds, token_type_mat, attention_mask, cls_mask = attention_inputs position_embeds, token_type_mat, attention_mask, cls_mask = attention_inputs
if self.config.pool_q_only: if self.config.pool_q_only:
...@@ -416,7 +433,7 @@ class FunnelAttentionStructure(nn.Module): ...@@ -416,7 +433,7 @@ class FunnelAttentionStructure(nn.Module):
return attention_inputs return attention_inputs
def _relative_shift_gather(positional_attn, context_len, shift): def _relative_shift_gather(positional_attn: torch.Tensor, context_len: int, shift: int) -> torch.Tensor:
batch_size, n_head, seq_len, max_rel_len = positional_attn.shape batch_size, n_head, seq_len, max_rel_len = positional_attn.shape
# max_rel_len = 2 * context_len + shift -1 is the numbers of possible relative positions i-j # max_rel_len = 2 * context_len + shift -1 is the numbers of possible relative positions i-j
...@@ -433,7 +450,7 @@ def _relative_shift_gather(positional_attn, context_len, shift): ...@@ -433,7 +450,7 @@ def _relative_shift_gather(positional_attn, context_len, shift):
class FunnelRelMultiheadAttention(nn.Module): class FunnelRelMultiheadAttention(nn.Module):
def __init__(self, config, block_index): def __init__(self, config: FunnelConfig, block_index: int) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
self.block_index = block_index self.block_index = block_index
...@@ -522,7 +539,14 @@ class FunnelRelMultiheadAttention(nn.Module): ...@@ -522,7 +539,14 @@ class FunnelRelMultiheadAttention(nn.Module):
token_type_attn *= cls_mask token_type_attn *= cls_mask
return token_type_attn return token_type_attn
def forward(self, query, key, value, attention_inputs, output_attentions=False): def forward(
self,
query: torch.Tensor,
key: torch.Tensor,
value: torch.Tensor,
attention_inputs: Tuple[torch.Tensor],
output_attentions: bool = False,
) -> Tuple[torch.Tensor, ...]:
# query has shape batch_size x seq_len x d_model # query has shape batch_size x seq_len x d_model
# key and value have shapes batch_size x context_len x d_model # key and value have shapes batch_size x context_len x d_model
position_embeds, token_type_mat, attention_mask, cls_mask = attention_inputs position_embeds, token_type_mat, attention_mask, cls_mask = attention_inputs
...@@ -570,7 +594,7 @@ class FunnelRelMultiheadAttention(nn.Module): ...@@ -570,7 +594,7 @@ class FunnelRelMultiheadAttention(nn.Module):
class FunnelPositionwiseFFN(nn.Module): class FunnelPositionwiseFFN(nn.Module):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__() super().__init__()
self.linear_1 = nn.Linear(config.d_model, config.d_inner) self.linear_1 = nn.Linear(config.d_model, config.d_inner)
self.activation_function = ACT2FN[config.hidden_act] self.activation_function = ACT2FN[config.hidden_act]
...@@ -579,7 +603,7 @@ class FunnelPositionwiseFFN(nn.Module): ...@@ -579,7 +603,7 @@ class FunnelPositionwiseFFN(nn.Module):
self.dropout = nn.Dropout(config.hidden_dropout) self.dropout = nn.Dropout(config.hidden_dropout)
self.layer_norm = nn.LayerNorm(config.d_model, config.layer_norm_eps) self.layer_norm = nn.LayerNorm(config.d_model, config.layer_norm_eps)
def forward(self, hidden): def forward(self, hidden: torch.Tensor) -> torch.Tensor:
h = self.linear_1(hidden) h = self.linear_1(hidden)
h = self.activation_function(h) h = self.activation_function(h)
h = self.activation_dropout(h) h = self.activation_dropout(h)
...@@ -589,19 +613,26 @@ class FunnelPositionwiseFFN(nn.Module): ...@@ -589,19 +613,26 @@ class FunnelPositionwiseFFN(nn.Module):
class FunnelLayer(nn.Module): class FunnelLayer(nn.Module):
def __init__(self, config, block_index): def __init__(self, config: FunnelConfig, block_index: int) -> None:
super().__init__() super().__init__()
self.attention = FunnelRelMultiheadAttention(config, block_index) self.attention = FunnelRelMultiheadAttention(config, block_index)
self.ffn = FunnelPositionwiseFFN(config) self.ffn = FunnelPositionwiseFFN(config)
def forward(self, query, key, value, attention_inputs, output_attentions=False): def forward(
self,
query: torch.Tensor,
key: torch.Tensor,
value: torch.Tensor,
attention_inputs,
output_attentions: bool = False,
) -> Tuple:
attn = self.attention(query, key, value, attention_inputs, output_attentions=output_attentions) attn = self.attention(query, key, value, attention_inputs, output_attentions=output_attentions)
output = self.ffn(attn[0]) output = self.ffn(attn[0])
return (output, attn[1]) if output_attentions else (output,) return (output, attn[1]) if output_attentions else (output,)
class FunnelEncoder(nn.Module): class FunnelEncoder(nn.Module):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
self.attention_structure = FunnelAttentionStructure(config) self.attention_structure = FunnelAttentionStructure(config)
...@@ -614,13 +645,13 @@ class FunnelEncoder(nn.Module): ...@@ -614,13 +645,13 @@ class FunnelEncoder(nn.Module):
def forward( def forward(
self, self,
inputs_embeds, inputs_embeds: torch.Tensor,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
token_type_ids=None, token_type_ids: Optional[torch.Tensor] = None,
output_attentions=False, output_attentions: bool = False,
output_hidden_states=False, output_hidden_states: bool = False,
return_dict=True, return_dict: bool = True,
): ) -> Union[Tuple, BaseModelOutput]:
# The pooling is not implemented on long tensors, so we convert this mask. # The pooling is not implemented on long tensors, so we convert this mask.
attention_mask = attention_mask.type_as(inputs_embeds) attention_mask = attention_mask.type_as(inputs_embeds)
attention_inputs = self.attention_structure.init_attention_inputs( attention_inputs = self.attention_structure.init_attention_inputs(
...@@ -663,7 +694,9 @@ class FunnelEncoder(nn.Module): ...@@ -663,7 +694,9 @@ class FunnelEncoder(nn.Module):
return BaseModelOutput(last_hidden_state=hidden, hidden_states=all_hidden_states, attentions=all_attentions) return BaseModelOutput(last_hidden_state=hidden, hidden_states=all_hidden_states, attentions=all_attentions)
def upsample(x, stride, target_len, separate_cls=True, truncate_seq=False): def upsample(
x: torch.Tensor, stride: int, target_len: int, separate_cls: bool = True, truncate_seq: bool = False
) -> torch.Tensor:
""" """
Upsample tensor `x` to match `target_len` by repeating the tokens `stride` time on the sequence length dimension. Upsample tensor `x` to match `target_len` by repeating the tokens `stride` time on the sequence length dimension.
""" """
...@@ -684,7 +717,7 @@ def upsample(x, stride, target_len, separate_cls=True, truncate_seq=False): ...@@ -684,7 +717,7 @@ def upsample(x, stride, target_len, separate_cls=True, truncate_seq=False):
class FunnelDecoder(nn.Module): class FunnelDecoder(nn.Module):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
self.attention_structure = FunnelAttentionStructure(config) self.attention_structure = FunnelAttentionStructure(config)
...@@ -692,14 +725,14 @@ class FunnelDecoder(nn.Module): ...@@ -692,14 +725,14 @@ class FunnelDecoder(nn.Module):
def forward( def forward(
self, self,
final_hidden, final_hidden: torch.Tensor,
first_block_hidden, first_block_hidden: torch.Tensor,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
token_type_ids=None, token_type_ids: Optional[torch.Tensor] = None,
output_attentions=False, output_attentions: bool = False,
output_hidden_states=False, output_hidden_states: bool = False,
return_dict=True, return_dict: bool = True,
): ) -> Union[Tuple, BaseModelOutput]:
upsampled_hidden = upsample( upsampled_hidden = upsample(
final_hidden, final_hidden,
stride=2 ** (len(self.config.block_sizes) - 1), stride=2 ** (len(self.config.block_sizes) - 1),
...@@ -735,13 +768,13 @@ class FunnelDecoder(nn.Module): ...@@ -735,13 +768,13 @@ class FunnelDecoder(nn.Module):
class FunnelDiscriminatorPredictions(nn.Module): class FunnelDiscriminatorPredictions(nn.Module):
"""Prediction module for the discriminator, made up of two dense layers.""" """Prediction module for the discriminator, made up of two dense layers."""
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
self.dense = nn.Linear(config.d_model, config.d_model) self.dense = nn.Linear(config.d_model, config.d_model)
self.dense_prediction = nn.Linear(config.d_model, 1) self.dense_prediction = nn.Linear(config.d_model, 1)
def forward(self, discriminator_hidden_states): def forward(self, discriminator_hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = self.dense(discriminator_hidden_states) hidden_states = self.dense(discriminator_hidden_states)
hidden_states = ACT2FN[self.config.hidden_act](hidden_states) hidden_states = ACT2FN[self.config.hidden_act](hidden_states)
logits = self.dense_prediction(hidden_states).squeeze() logits = self.dense_prediction(hidden_states).squeeze()
...@@ -784,13 +817,13 @@ class FunnelPreTrainedModel(PreTrainedModel): ...@@ -784,13 +817,13 @@ class FunnelPreTrainedModel(PreTrainedModel):
class FunnelClassificationHead(nn.Module): class FunnelClassificationHead(nn.Module):
def __init__(self, config, n_labels): def __init__(self, config: FunnelConfig, n_labels: int) -> None:
super().__init__() super().__init__()
self.linear_hidden = nn.Linear(config.d_model, config.d_model) self.linear_hidden = nn.Linear(config.d_model, config.d_model)
self.dropout = nn.Dropout(config.hidden_dropout) self.dropout = nn.Dropout(config.hidden_dropout)
self.linear_out = nn.Linear(config.d_model, n_labels) self.linear_out = nn.Linear(config.d_model, n_labels)
def forward(self, hidden): def forward(self, hidden: torch.Tensor) -> torch.Tensor:
hidden = self.linear_hidden(hidden) hidden = self.linear_hidden(hidden)
hidden = torch.tanh(hidden) hidden = torch.tanh(hidden)
hidden = self.dropout(hidden) hidden = self.dropout(hidden)
...@@ -892,7 +925,7 @@ FUNNEL_INPUTS_DOCSTRING = r""" ...@@ -892,7 +925,7 @@ FUNNEL_INPUTS_DOCSTRING = r"""
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class FunnelBaseModel(FunnelPreTrainedModel): class FunnelBaseModel(FunnelPreTrainedModel):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__(config) super().__init__(config)
self.embeddings = FunnelEmbeddings(config) self.embeddings = FunnelEmbeddings(config)
...@@ -901,10 +934,10 @@ class FunnelBaseModel(FunnelPreTrainedModel): ...@@ -901,10 +934,10 @@ class FunnelBaseModel(FunnelPreTrainedModel):
# Initialize weights and apply final processing # Initialize weights and apply final processing
self.post_init() self.post_init()
def get_input_embeddings(self): def get_input_embeddings(self) -> nn.Embedding:
return self.embeddings.word_embeddings return self.embeddings.word_embeddings
def set_input_embeddings(self, new_embeddings): def set_input_embeddings(self, new_embeddings: nn.Embedding) -> None:
self.embeddings.word_embeddings = new_embeddings self.embeddings.word_embeddings = new_embeddings
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
...@@ -916,16 +949,16 @@ class FunnelBaseModel(FunnelPreTrainedModel): ...@@ -916,16 +949,16 @@ class FunnelBaseModel(FunnelPreTrainedModel):
) )
def forward( def forward(
self, self,
input_ids=None, input_ids: Optional[torch.Tensor] = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
token_type_ids=None, token_type_ids: Optional[torch.Tensor] = None,
position_ids=None, position_ids: Optional[torch.Tensor] = None,
head_mask=None, head_mask: Optional[torch.Tensor] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.Tensor] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Tuple, BaseModelOutput]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
...@@ -969,7 +1002,7 @@ class FunnelBaseModel(FunnelPreTrainedModel): ...@@ -969,7 +1002,7 @@ class FunnelBaseModel(FunnelPreTrainedModel):
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class FunnelModel(FunnelPreTrainedModel): class FunnelModel(FunnelPreTrainedModel):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__(config) super().__init__(config)
self.config = config self.config = config
self.embeddings = FunnelEmbeddings(config) self.embeddings = FunnelEmbeddings(config)
...@@ -979,10 +1012,10 @@ class FunnelModel(FunnelPreTrainedModel): ...@@ -979,10 +1012,10 @@ class FunnelModel(FunnelPreTrainedModel):
# Initialize weights and apply final processing # Initialize weights and apply final processing
self.post_init() self.post_init()
def get_input_embeddings(self): def get_input_embeddings(self) -> nn.Embedding:
return self.embeddings.word_embeddings return self.embeddings.word_embeddings
def set_input_embeddings(self, new_embeddings): def set_input_embeddings(self, new_embeddings: nn.Embedding) -> None:
self.embeddings.word_embeddings = new_embeddings self.embeddings.word_embeddings = new_embeddings
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
...@@ -994,14 +1027,14 @@ class FunnelModel(FunnelPreTrainedModel): ...@@ -994,14 +1027,14 @@ class FunnelModel(FunnelPreTrainedModel):
) )
def forward( def forward(
self, self,
input_ids=None, input_ids: Optional[torch.Tensor] = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
token_type_ids=None, token_type_ids: Optional[torch.Tensor] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.Tensor] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Tuple, BaseModelOutput]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
...@@ -1077,7 +1110,7 @@ add_start_docstrings( ...@@ -1077,7 +1110,7 @@ add_start_docstrings(
class FunnelForPreTraining(FunnelPreTrainedModel): class FunnelForPreTraining(FunnelPreTrainedModel):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__(config) super().__init__(config)
self.funnel = FunnelModel(config) self.funnel = FunnelModel(config)
...@@ -1089,15 +1122,15 @@ class FunnelForPreTraining(FunnelPreTrainedModel): ...@@ -1089,15 +1122,15 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
@replace_return_docstrings(output_type=FunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=FunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
input_ids=None, input_ids: Optional[torch.Tensor] = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
token_type_ids=None, token_type_ids: Optional[torch.Tensor] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.Tensor] = None,
labels=None, labels: Optional[torch.Tensor] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Tuple, FunnelForPreTrainingOutput]:
r""" r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the ELECTRA-style loss. Input should be a sequence of tokens (see `input_ids` Labels for computing the ELECTRA-style loss. Input should be a sequence of tokens (see `input_ids`
...@@ -1160,7 +1193,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel): ...@@ -1160,7 +1193,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
@add_start_docstrings("""Funnel Transformer Model with a `language modeling` head on top.""", FUNNEL_START_DOCSTRING) @add_start_docstrings("""Funnel Transformer Model with a `language modeling` head on top.""", FUNNEL_START_DOCSTRING)
class FunnelForMaskedLM(FunnelPreTrainedModel): class FunnelForMaskedLM(FunnelPreTrainedModel):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__(config) super().__init__(config)
self.funnel = FunnelModel(config) self.funnel = FunnelModel(config)
...@@ -1169,10 +1202,10 @@ class FunnelForMaskedLM(FunnelPreTrainedModel): ...@@ -1169,10 +1202,10 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
# Initialize weights and apply final processing # Initialize weights and apply final processing
self.post_init() self.post_init()
def get_output_embeddings(self): def get_output_embeddings(self) -> nn.Linear:
return self.lm_head return self.lm_head
def set_output_embeddings(self, new_embeddings): def set_output_embeddings(self, new_embeddings: nn.Embedding) -> None:
self.lm_head = new_embeddings self.lm_head = new_embeddings
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
...@@ -1185,15 +1218,15 @@ class FunnelForMaskedLM(FunnelPreTrainedModel): ...@@ -1185,15 +1218,15 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
) )
def forward( def forward(
self, self,
input_ids=None, input_ids: Optional[torch.Tensor] = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
token_type_ids=None, token_type_ids: Optional[torch.Tensor] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.Tensor] = None,
labels=None, labels: Optional[torch.Tensor] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Tuple, MaskedLMOutput]:
r""" r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
...@@ -1240,7 +1273,7 @@ class FunnelForMaskedLM(FunnelPreTrainedModel): ...@@ -1240,7 +1273,7 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class FunnelForSequenceClassification(FunnelPreTrainedModel): class FunnelForSequenceClassification(FunnelPreTrainedModel):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__(config) super().__init__(config)
self.num_labels = config.num_labels self.num_labels = config.num_labels
self.config = config self.config = config
...@@ -1259,15 +1292,15 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel): ...@@ -1259,15 +1292,15 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
) )
def forward( def forward(
self, self,
input_ids=None, input_ids: Optional[torch.Tensor] = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
token_type_ids=None, token_type_ids: Optional[torch.Tensor] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.Tensor] = None,
labels=None, labels: Optional[torch.Tensor] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Tuple, SequenceClassifierOutput]:
r""" r"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
...@@ -1333,7 +1366,7 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel): ...@@ -1333,7 +1366,7 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class FunnelForMultipleChoice(FunnelPreTrainedModel): class FunnelForMultipleChoice(FunnelPreTrainedModel):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__(config) super().__init__(config)
self.funnel = FunnelBaseModel(config) self.funnel = FunnelBaseModel(config)
...@@ -1350,15 +1383,15 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel): ...@@ -1350,15 +1383,15 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel):
) )
def forward( def forward(
self, self,
input_ids=None, input_ids: Optional[torch.Tensor] = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
token_type_ids=None, token_type_ids: Optional[torch.Tensor] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.Tensor] = None,
labels=None, labels: Optional[torch.Tensor] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Tuple, MultipleChoiceModelOutput]:
r""" r"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
...@@ -1417,7 +1450,7 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel): ...@@ -1417,7 +1450,7 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel):
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class FunnelForTokenClassification(FunnelPreTrainedModel): class FunnelForTokenClassification(FunnelPreTrainedModel):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__(config) super().__init__(config)
self.num_labels = config.num_labels self.num_labels = config.num_labels
...@@ -1437,15 +1470,15 @@ class FunnelForTokenClassification(FunnelPreTrainedModel): ...@@ -1437,15 +1470,15 @@ class FunnelForTokenClassification(FunnelPreTrainedModel):
) )
def forward( def forward(
self, self,
input_ids=None, input_ids: Optional[torch.Tensor] = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
token_type_ids=None, token_type_ids: Optional[torch.Tensor] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.Tensor] = None,
labels=None, labels: Optional[torch.Tensor] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Tuple, TokenClassifierOutput]:
r""" r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
...@@ -1491,7 +1524,7 @@ class FunnelForTokenClassification(FunnelPreTrainedModel): ...@@ -1491,7 +1524,7 @@ class FunnelForTokenClassification(FunnelPreTrainedModel):
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class FunnelForQuestionAnswering(FunnelPreTrainedModel): class FunnelForQuestionAnswering(FunnelPreTrainedModel):
def __init__(self, config): def __init__(self, config: FunnelConfig) -> None:
super().__init__(config) super().__init__(config)
self.num_labels = config.num_labels self.num_labels = config.num_labels
...@@ -1510,16 +1543,16 @@ class FunnelForQuestionAnswering(FunnelPreTrainedModel): ...@@ -1510,16 +1543,16 @@ class FunnelForQuestionAnswering(FunnelPreTrainedModel):
) )
def forward( def forward(
self, self,
input_ids=None, input_ids: Optional[torch.Tensor] = None,
attention_mask=None, attention_mask: Optional[torch.Tensor] = None,
token_type_ids=None, token_type_ids: Optional[torch.Tensor] = None,
inputs_embeds=None, inputs_embeds: Optional[torch.Tensor] = None,
start_positions=None, start_positions: Optional[torch.Tensor] = None,
end_positions=None, end_positions: Optional[torch.Tensor] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
): ) -> Union[Tuple, QuestionAnsweringModelOutput]:
r""" r"""
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*): start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.
......
...@@ -16,8 +16,9 @@ ...@@ -16,8 +16,9 @@
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, Optional, Tuple from typing import Dict, Optional, Tuple, Union
import numpy as np
import tensorflow as tf import tensorflow as tf
from ...activations_tf import get_tf_activation from ...activations_tf import get_tf_activation
...@@ -39,6 +40,7 @@ from ...modeling_tf_outputs import ( ...@@ -39,6 +40,7 @@ from ...modeling_tf_outputs import (
) )
from ...modeling_tf_utils import ( from ...modeling_tf_utils import (
TFMaskedLanguageModelingLoss, TFMaskedLanguageModelingLoss,
TFModelInputType,
TFMultipleChoiceLoss, TFMultipleChoiceLoss,
TFPreTrainedModel, TFPreTrainedModel,
TFQuestionAnsweringLoss, TFQuestionAnsweringLoss,
...@@ -1093,7 +1095,7 @@ FUNNEL_INPUTS_DOCSTRING = r""" ...@@ -1093,7 +1095,7 @@ FUNNEL_INPUTS_DOCSTRING = r"""
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class TFFunnelBaseModel(TFFunnelPreTrainedModel): class TFFunnelBaseModel(TFFunnelPreTrainedModel):
def __init__(self, config, *inputs, **kwargs): def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None:
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
self.funnel = TFFunnelBaseLayer(config, name="funnel") self.funnel = TFFunnelBaseLayer(config, name="funnel")
...@@ -1107,16 +1109,16 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel): ...@@ -1107,16 +1109,16 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
@unpack_inputs @unpack_inputs
def call( def call(
self, self,
input_ids=None, input_ids: Optional[TFModelInputType] = None,
attention_mask=None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids=None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
inputs_embeds=None, inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
training=False, training: bool = False,
**kwargs, **kwargs,
): ) -> Union[Tuple[tf.Tensor], TFBaseModelOutput]:
return self.funnel( return self.funnel(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
...@@ -1141,7 +1143,7 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel): ...@@ -1141,7 +1143,7 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class TFFunnelModel(TFFunnelPreTrainedModel): class TFFunnelModel(TFFunnelPreTrainedModel):
def __init__(self, config, *inputs, **kwargs): def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None:
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
self.funnel = TFFunnelMainLayer(config, name="funnel") self.funnel = TFFunnelMainLayer(config, name="funnel")
...@@ -1155,16 +1157,16 @@ class TFFunnelModel(TFFunnelPreTrainedModel): ...@@ -1155,16 +1157,16 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
) )
def call( def call(
self, self,
input_ids=None, input_ids: Optional[TFModelInputType] = None,
attention_mask=None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids=None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
inputs_embeds=None, inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
training=False, training: bool = False,
**kwargs, **kwargs,
): ) -> Union[Tuple[tf.Tensor], TFBaseModelOutput]:
return self.funnel( return self.funnel(
input_ids=input_ids, input_ids=input_ids,
...@@ -1192,7 +1194,7 @@ class TFFunnelModel(TFFunnelPreTrainedModel): ...@@ -1192,7 +1194,7 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class TFFunnelForPreTraining(TFFunnelPreTrainedModel): class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
def __init__(self, config, **kwargs): def __init__(self, config: FunnelConfig, **kwargs) -> None:
super().__init__(config, **kwargs) super().__init__(config, **kwargs)
self.funnel = TFFunnelMainLayer(config, name="funnel") self.funnel = TFFunnelMainLayer(config, name="funnel")
...@@ -1203,16 +1205,16 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel): ...@@ -1203,16 +1205,16 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
@replace_return_docstrings(output_type=TFFunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=TFFunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
def call( def call(
self, self,
input_ids=None, input_ids: Optional[TFModelInputType] = None,
attention_mask=None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids=None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
inputs_embeds=None, inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
training=False, training: bool = False,
**kwargs **kwargs
): ) -> Union[Tuple[tf.Tensor], TFFunnelForPreTrainingOutput]:
r""" r"""
Returns: Returns:
...@@ -1259,16 +1261,16 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel): ...@@ -1259,16 +1261,16 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
@add_start_docstrings("""Funnel Model with a `language modeling` head on top.""", FUNNEL_START_DOCSTRING) @add_start_docstrings("""Funnel Model with a `language modeling` head on top.""", FUNNEL_START_DOCSTRING)
class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss): class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss):
def __init__(self, config, *inputs, **kwargs): def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None:
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
self.funnel = TFFunnelMainLayer(config, name="funnel") self.funnel = TFFunnelMainLayer(config, name="funnel")
self.lm_head = TFFunnelMaskedLMHead(config, self.funnel.embeddings, name="lm_head") self.lm_head = TFFunnelMaskedLMHead(config, self.funnel.embeddings, name="lm_head")
def get_lm_head(self): def get_lm_head(self) -> TFFunnelMaskedLMHead:
return self.lm_head return self.lm_head
def get_prefix_bias_name(self): def get_prefix_bias_name(self) -> str:
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning) warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.lm_head.name return self.name + "/" + self.lm_head.name
...@@ -1282,17 +1284,17 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss) ...@@ -1282,17 +1284,17 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
) )
def call( def call(
self, self,
input_ids=None, input_ids: Optional[TFModelInputType] = None,
attention_mask=None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids=None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
inputs_embeds=None, inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
labels=None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training=False, training: bool = False,
**kwargs, **kwargs,
): ) -> Union[Tuple[tf.Tensor], TFMaskedLMOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
...@@ -1341,7 +1343,7 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss) ...@@ -1341,7 +1343,7 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClassificationLoss): class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClassificationLoss):
def __init__(self, config, *inputs, **kwargs): def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None:
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels self.num_labels = config.num_labels
...@@ -1358,17 +1360,17 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass ...@@ -1358,17 +1360,17 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
) )
def call( def call(
self, self,
input_ids=None, input_ids: Optional[TFModelInputType] = None,
attention_mask=None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids=None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
inputs_embeds=None, inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
labels=None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training=False, training: bool = False,
**kwargs, **kwargs,
): ) -> Union[Tuple[tf.Tensor], TFSequenceClassifierOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
...@@ -1418,7 +1420,7 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass ...@@ -1418,7 +1420,7 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
def __init__(self, config, *inputs, **kwargs): def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None:
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
self.funnel = TFFunnelBaseLayer(config, name="funnel") self.funnel = TFFunnelBaseLayer(config, name="funnel")
...@@ -1444,17 +1446,17 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1444,17 +1446,17 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
) )
def call( def call(
self, self,
input_ids=None, input_ids: Optional[TFModelInputType] = None,
attention_mask=None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids=None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
inputs_embeds=None, inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
labels=None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training=False, training: bool = False,
**kwargs, **kwargs,
): ) -> Union[Tuple[tf.Tensor], TFMultipleChoiceModelOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
...@@ -1514,7 +1516,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1514,7 +1516,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
} }
] ]
) )
def serving(self, inputs: Dict[str, tf.Tensor]): def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput:
output = self.call(input_ids=inputs) output = self.call(input_ids=inputs)
return self.serving_output(output=output) return self.serving_output(output=output)
...@@ -1535,7 +1537,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1535,7 +1537,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificationLoss): class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificationLoss):
def __init__(self, config, *inputs, **kwargs): def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None:
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels self.num_labels = config.num_labels
...@@ -1555,17 +1557,17 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat ...@@ -1555,17 +1557,17 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
) )
def call( def call(
self, self,
input_ids=None, input_ids: Optional[TFModelInputType] = None,
attention_mask=None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids=None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
inputs_embeds=None, inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
labels=None, labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training=False, training: bool = False,
**kwargs, **kwargs,
): ) -> Union[Tuple[tf.Tensor], TFTokenClassifierOutput]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
...@@ -1614,7 +1616,7 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat ...@@ -1614,7 +1616,7 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
FUNNEL_START_DOCSTRING, FUNNEL_START_DOCSTRING,
) )
class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringLoss): class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringLoss):
def __init__(self, config, *inputs, **kwargs): def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None:
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels self.num_labels = config.num_labels
...@@ -1633,18 +1635,18 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL ...@@ -1633,18 +1635,18 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL
) )
def call( def call(
self, self,
input_ids=None, input_ids: Optional[TFModelInputType] = None,
attention_mask=None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids=None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
inputs_embeds=None, inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None,
output_attentions=None, output_attentions: Optional[bool] = None,
output_hidden_states=None, output_hidden_states: Optional[bool] = None,
return_dict=None, return_dict: Optional[bool] = None,
start_positions=None, start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
end_positions=None, end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None,
training=False, training: bool = False,
**kwargs, **kwargs,
): ) -> Union[Tuple[tf.Tensor], TFQuestionAnsweringModelOutput]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment