Unverified Commit 378142af authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Rename add_start_docstrings_to_callable (#8120)

parent 6241c873
...@@ -20,7 +20,7 @@ import torch ...@@ -20,7 +20,7 @@ import torch
import torch.nn as nn import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss from torch.nn import CrossEntropyLoss, MSELoss
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.modeling_albert import ( from transformers.modeling_albert import (
ALBERT_INPUTS_DOCSTRING, ALBERT_INPUTS_DOCSTRING,
ALBERT_START_DOCSTRING, ALBERT_START_DOCSTRING,
...@@ -87,7 +87,7 @@ class AlbertModelWithPabee(AlbertModel): ...@@ -87,7 +87,7 @@ class AlbertModelWithPabee(AlbertModel):
message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***" message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***"
print(message) print(message)
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
...@@ -230,7 +230,7 @@ class AlbertForSequenceClassificationWithPabee(AlbertPreTrainedModel): ...@@ -230,7 +230,7 @@ class AlbertForSequenceClassificationWithPabee(AlbertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
......
...@@ -22,7 +22,7 @@ import torch ...@@ -22,7 +22,7 @@ import torch
from torch import nn from torch import nn
from torch.nn import CrossEntropyLoss, MSELoss from torch.nn import CrossEntropyLoss, MSELoss
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.modeling_bert import ( from transformers.modeling_bert import (
BERT_INPUTS_DOCSTRING, BERT_INPUTS_DOCSTRING,
BERT_START_DOCSTRING, BERT_START_DOCSTRING,
...@@ -92,7 +92,7 @@ class BertModelWithPabee(BertModel): ...@@ -92,7 +92,7 @@ class BertModelWithPabee(BertModel):
message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***" message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***"
print(message) print(message)
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
...@@ -254,7 +254,7 @@ class BertForSequenceClassificationWithPabee(BertPreTrainedModel): ...@@ -254,7 +254,7 @@ class BertForSequenceClassificationWithPabee(BertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
......
...@@ -2,7 +2,7 @@ import torch ...@@ -2,7 +2,7 @@ import torch
from torch import nn from torch import nn
from torch.nn import CrossEntropyLoss, MSELoss from torch.nn import CrossEntropyLoss, MSELoss
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.modeling_bert import ( from transformers.modeling_bert import (
BERT_INPUTS_DOCSTRING, BERT_INPUTS_DOCSTRING,
BERT_START_DOCSTRING, BERT_START_DOCSTRING,
...@@ -134,7 +134,7 @@ class DeeBertModel(BertPreTrainedModel): ...@@ -134,7 +134,7 @@ class DeeBertModel(BertPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
...@@ -288,7 +288,7 @@ class DeeBertForSequenceClassification(BertPreTrainedModel): ...@@ -288,7 +288,7 @@ class DeeBertForSequenceClassification(BertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
......
...@@ -4,7 +4,7 @@ import torch.nn as nn ...@@ -4,7 +4,7 @@ import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss from torch.nn import CrossEntropyLoss, MSELoss
from transformers.configuration_roberta import RobertaConfig from transformers.configuration_roberta import RobertaConfig
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.modeling_roberta import ROBERTA_INPUTS_DOCSTRING, ROBERTA_START_DOCSTRING, RobertaEmbeddings from transformers.modeling_roberta import ROBERTA_INPUTS_DOCSTRING, ROBERTA_START_DOCSTRING, RobertaEmbeddings
from .modeling_highway_bert import BertPreTrainedModel, DeeBertModel, HighwayException, entropy from .modeling_highway_bert import BertPreTrainedModel, DeeBertModel, HighwayException, entropy
...@@ -45,7 +45,7 @@ class DeeRobertaForSequenceClassification(BertPreTrainedModel): ...@@ -45,7 +45,7 @@ class DeeRobertaForSequenceClassification(BertPreTrainedModel):
self.dropout = nn.Dropout(config.hidden_dropout_prob) self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
......
...@@ -28,7 +28,7 @@ from torch.nn import CrossEntropyLoss, MSELoss ...@@ -28,7 +28,7 @@ from torch.nn import CrossEntropyLoss, MSELoss
from emmental import MaskedBertConfig from emmental import MaskedBertConfig
from emmental.modules import MaskedLinear from emmental.modules import MaskedLinear
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.modeling_bert import ACT2FN, BertLayerNorm, load_tf_weights_in_bert from transformers.modeling_bert import ACT2FN, BertLayerNorm, load_tf_weights_in_bert
from transformers.modeling_utils import PreTrainedModel, prune_linear_layer from transformers.modeling_utils import PreTrainedModel, prune_linear_layer
...@@ -498,7 +498,7 @@ class MaskedBertModel(MaskedBertPreTrainedModel): ...@@ -498,7 +498,7 @@ class MaskedBertModel(MaskedBertPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
...@@ -671,7 +671,7 @@ class MaskedBertForSequenceClassification(MaskedBertPreTrainedModel): ...@@ -671,7 +671,7 @@ class MaskedBertForSequenceClassification(MaskedBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
...@@ -756,7 +756,7 @@ class MaskedBertForMultipleChoice(MaskedBertPreTrainedModel): ...@@ -756,7 +756,7 @@ class MaskedBertForMultipleChoice(MaskedBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
...@@ -846,7 +846,7 @@ class MaskedBertForTokenClassification(MaskedBertPreTrainedModel): ...@@ -846,7 +846,7 @@ class MaskedBertForTokenClassification(MaskedBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
...@@ -932,7 +932,7 @@ class MaskedBertForQuestionAnswering(MaskedBertPreTrainedModel): ...@@ -932,7 +932,7 @@ class MaskedBertForQuestionAnswering(MaskedBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
......
...@@ -425,7 +425,7 @@ def add_start_docstrings(*docstr): ...@@ -425,7 +425,7 @@ def add_start_docstrings(*docstr):
return docstring_decorator return docstring_decorator
def add_start_docstrings_to_callable(*docstr): def add_start_docstrings_to_model_forward(*docstr):
def docstring_decorator(fn): def docstring_decorator(fn):
class_name = ":class:`~transformers.{}`".format(fn.__qualname__.split(".")[0]) class_name = ":class:`~transformers.{}`".format(fn.__qualname__.split(".")[0])
intro = " The {} forward method, overrides the :func:`__call__` special method.".format(class_name) intro = " The {} forward method, overrides the :func:`__call__` special method.".format(class_name)
......
...@@ -30,7 +30,7 @@ from .file_utils import ( ...@@ -30,7 +30,7 @@ from .file_utils import (
ModelOutput, ModelOutput,
add_code_sample_docstrings, add_code_sample_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_callable, add_start_docstrings_to_model_forward,
replace_return_docstrings, replace_return_docstrings,
) )
from .modeling_outputs import ( from .modeling_outputs import (
...@@ -631,7 +631,7 @@ class AlbertModel(AlbertPreTrainedModel): ...@@ -631,7 +631,7 @@ class AlbertModel(AlbertPreTrainedModel):
inner_group_idx = int(layer - group_idx * self.config.inner_group_num) inner_group_idx = int(layer - group_idx * self.config.inner_group_num)
self.encoder.albert_layer_groups[group_idx].albert_layers[inner_group_idx].attention.prune_heads(heads) self.encoder.albert_layer_groups[group_idx].albert_layers[inner_group_idx].attention.prune_heads(heads)
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="albert-base-v2", checkpoint="albert-base-v2",
...@@ -727,7 +727,7 @@ class AlbertForPreTraining(AlbertPreTrainedModel): ...@@ -727,7 +727,7 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
def get_input_embeddings(self): def get_input_embeddings(self):
return self.albert.embeddings.word_embeddings return self.albert.embeddings.word_embeddings
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=AlbertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=AlbertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -879,7 +879,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel): ...@@ -879,7 +879,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
def get_input_embeddings(self): def get_input_embeddings(self):
return self.albert.embeddings.word_embeddings return self.albert.embeddings.word_embeddings
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="albert-base-v2", checkpoint="albert-base-v2",
...@@ -967,7 +967,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel): ...@@ -967,7 +967,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="albert-base-v2", checkpoint="albert-base-v2",
...@@ -1055,7 +1055,7 @@ class AlbertForTokenClassification(AlbertPreTrainedModel): ...@@ -1055,7 +1055,7 @@ class AlbertForTokenClassification(AlbertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="albert-base-v2", checkpoint="albert-base-v2",
...@@ -1143,7 +1143,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel): ...@@ -1143,7 +1143,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="albert-base-v2", checkpoint="albert-base-v2",
...@@ -1242,7 +1242,7 @@ class AlbertForMultipleChoice(AlbertPreTrainedModel): ...@@ -1242,7 +1242,7 @@ class AlbertForMultipleChoice(AlbertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="albert-base-v2", checkpoint="albert-base-v2",
......
...@@ -30,7 +30,7 @@ from .file_utils import ( ...@@ -30,7 +30,7 @@ from .file_utils import (
add_code_sample_docstrings, add_code_sample_docstrings,
add_end_docstrings, add_end_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_callable, add_start_docstrings_to_model_forward,
replace_return_docstrings, replace_return_docstrings,
) )
from .modeling_outputs import ( from .modeling_outputs import (
...@@ -846,7 +846,7 @@ class BartModel(PretrainedBartModel): ...@@ -846,7 +846,7 @@ class BartModel(PretrainedBartModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="facebook/bart-large", checkpoint="facebook/bart-large",
...@@ -981,7 +981,7 @@ class BartForConditionalGeneration(PretrainedBartModel): ...@@ -981,7 +981,7 @@ class BartForConditionalGeneration(PretrainedBartModel):
new_bias = torch.cat([self.final_logits_bias, extra_bias], dim=1) new_bias = torch.cat([self.final_logits_bias, extra_bias], dim=1)
self.register_buffer("final_logits_bias", new_bias) self.register_buffer("final_logits_bias", new_bias)
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
@add_end_docstrings(BART_GENERATION_EXAMPLE) @add_end_docstrings(BART_GENERATION_EXAMPLE)
def forward( def forward(
...@@ -1147,7 +1147,7 @@ class BartForSequenceClassification(PretrainedBartModel): ...@@ -1147,7 +1147,7 @@ class BartForSequenceClassification(PretrainedBartModel):
self.model._init_weights(self.classification_head.dense) self.model._init_weights(self.classification_head.dense)
self.model._init_weights(self.classification_head.out_proj) self.model._init_weights(self.classification_head.out_proj)
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="facebook/bart-large", checkpoint="facebook/bart-large",
...@@ -1234,7 +1234,7 @@ class BartForQuestionAnswering(PretrainedBartModel): ...@@ -1234,7 +1234,7 @@ class BartForQuestionAnswering(PretrainedBartModel):
self.model._init_weights(self.qa_outputs) self.model._init_weights(self.qa_outputs)
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="facebook/bart-large", checkpoint="facebook/bart-large",
......
...@@ -33,7 +33,7 @@ from .file_utils import ( ...@@ -33,7 +33,7 @@ from .file_utils import (
ModelOutput, ModelOutput,
add_code_sample_docstrings, add_code_sample_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_callable, add_start_docstrings_to_model_forward,
replace_return_docstrings, replace_return_docstrings,
) )
from .modeling_outputs import ( from .modeling_outputs import (
...@@ -748,7 +748,7 @@ class BertModel(BertPreTrainedModel): ...@@ -748,7 +748,7 @@ class BertModel(BertPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="bert-base-uncased", checkpoint="bert-base-uncased",
...@@ -870,7 +870,7 @@ class BertForPreTraining(BertPreTrainedModel): ...@@ -870,7 +870,7 @@ class BertForPreTraining(BertPreTrainedModel):
def get_output_embeddings(self): def get_output_embeddings(self):
return self.cls.predictions.decoder return self.cls.predictions.decoder
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=BertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=BertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -983,7 +983,7 @@ class BertLMHeadModel(BertPreTrainedModel): ...@@ -983,7 +983,7 @@ class BertLMHeadModel(BertPreTrainedModel):
def get_output_embeddings(self): def get_output_embeddings(self):
return self.cls.predictions.decoder return self.cls.predictions.decoder
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -1103,7 +1103,7 @@ class BertForMaskedLM(BertPreTrainedModel): ...@@ -1103,7 +1103,7 @@ class BertForMaskedLM(BertPreTrainedModel):
def get_output_embeddings(self): def get_output_embeddings(self):
return self.cls.predictions.decoder return self.cls.predictions.decoder
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="bert-base-uncased", checkpoint="bert-base-uncased",
...@@ -1206,7 +1206,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel): ...@@ -1206,7 +1206,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=NextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=NextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -1300,7 +1300,7 @@ class BertForSequenceClassification(BertPreTrainedModel): ...@@ -1300,7 +1300,7 @@ class BertForSequenceClassification(BertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="bert-base-uncased", checkpoint="bert-base-uncased",
...@@ -1384,7 +1384,7 @@ class BertForMultipleChoice(BertPreTrainedModel): ...@@ -1384,7 +1384,7 @@ class BertForMultipleChoice(BertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="bert-base-uncased", checkpoint="bert-base-uncased",
...@@ -1479,7 +1479,7 @@ class BertForTokenClassification(BertPreTrainedModel): ...@@ -1479,7 +1479,7 @@ class BertForTokenClassification(BertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="bert-base-uncased", checkpoint="bert-base-uncased",
...@@ -1569,7 +1569,7 @@ class BertForQuestionAnswering(BertPreTrainedModel): ...@@ -1569,7 +1569,7 @@ class BertForQuestionAnswering(BertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="bert-base-uncased", checkpoint="bert-base-uncased",
......
...@@ -24,7 +24,7 @@ from .configuration_bert_generation import BertGenerationConfig ...@@ -24,7 +24,7 @@ from .configuration_bert_generation import BertGenerationConfig
from .file_utils import ( from .file_utils import (
add_code_sample_docstrings, add_code_sample_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_callable, add_start_docstrings_to_model_forward,
replace_return_docstrings, replace_return_docstrings,
) )
from .modeling_bert import BertEncoder from .modeling_bert import BertEncoder
...@@ -293,7 +293,7 @@ class BertGenerationEncoder(BertGenerationPreTrainedModel): ...@@ -293,7 +293,7 @@ class BertGenerationEncoder(BertGenerationPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_callable(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="google/bert_for_seq_generation_L-24_bbc_encoder", checkpoint="google/bert_for_seq_generation_L-24_bbc_encoder",
...@@ -421,7 +421,7 @@ class BertGenerationDecoder(BertGenerationPreTrainedModel): ...@@ -421,7 +421,7 @@ class BertGenerationDecoder(BertGenerationPreTrainedModel):
def get_output_embeddings(self): def get_output_embeddings(self):
return self.lm_head.decoder return self.lm_head.decoder
@add_start_docstrings_to_callable(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
......
...@@ -24,7 +24,7 @@ import torch.nn as nn ...@@ -24,7 +24,7 @@ import torch.nn as nn
from torch.nn import CrossEntropyLoss from torch.nn import CrossEntropyLoss
from .configuration_ctrl import CTRLConfig from .configuration_ctrl import CTRLConfig
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
from .modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast from .modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
from .modeling_utils import Conv1D, PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer from .modeling_utils import Conv1D, PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer
from .utils import logging from .utils import logging
...@@ -349,7 +349,7 @@ class CTRLModel(CTRLPreTrainedModel): ...@@ -349,7 +349,7 @@ class CTRLModel(CTRLPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.h[layer].multi_head_attention.prune_heads(heads) self.h[layer].multi_head_attention.prune_heads(heads)
@add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="ctrl", checkpoint="ctrl",
...@@ -521,7 +521,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel): ...@@ -521,7 +521,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
return {"input_ids": input_ids, "past_key_values": past, "use_cache": kwargs["use_cache"]} return {"input_ids": input_ids, "past_key_values": past, "use_cache": kwargs["use_cache"]}
@add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="ctrl", checkpoint="ctrl",
......
...@@ -24,7 +24,7 @@ from torch.nn import CrossEntropyLoss ...@@ -24,7 +24,7 @@ from torch.nn import CrossEntropyLoss
from .activations import ACT2FN from .activations import ACT2FN
from .configuration_deberta import DebertaConfig from .configuration_deberta import DebertaConfig
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
from .modeling_outputs import BaseModelOutput, SequenceClassifierOutput from .modeling_outputs import BaseModelOutput, SequenceClassifierOutput
from .modeling_utils import PreTrainedModel from .modeling_utils import PreTrainedModel
from .utils import logging from .utils import logging
...@@ -858,7 +858,7 @@ class DebertaModel(DebertaPreTrainedModel): ...@@ -858,7 +858,7 @@ class DebertaModel(DebertaPreTrainedModel):
""" """
raise NotImplementedError("The prune function is not implemented in DeBERTa model.") raise NotImplementedError("The prune function is not implemented in DeBERTa model.")
@add_start_docstrings_to_callable(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="microsoft/deberta-base", checkpoint="microsoft/deberta-base",
...@@ -976,7 +976,7 @@ class DebertaForSequenceClassification(DebertaPreTrainedModel): ...@@ -976,7 +976,7 @@ class DebertaForSequenceClassification(DebertaPreTrainedModel):
def set_input_embeddings(self, new_embeddings): def set_input_embeddings(self, new_embeddings):
self.deberta.set_input_embeddings(new_embeddings) self.deberta.set_input_embeddings(new_embeddings)
@add_start_docstrings_to_callable(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="microsoft/deberta-base", checkpoint="microsoft/deberta-base",
......
...@@ -32,7 +32,7 @@ from .configuration_distilbert import DistilBertConfig ...@@ -32,7 +32,7 @@ from .configuration_distilbert import DistilBertConfig
from .file_utils import ( from .file_utils import (
add_code_sample_docstrings, add_code_sample_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_callable, add_start_docstrings_to_model_forward,
replace_return_docstrings, replace_return_docstrings,
) )
from .modeling_outputs import ( from .modeling_outputs import (
...@@ -436,7 +436,7 @@ class DistilBertModel(DistilBertPreTrainedModel): ...@@ -436,7 +436,7 @@ class DistilBertModel(DistilBertPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.transformer.layer[layer].attention.prune_heads(heads) self.transformer.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="distilbert-base-uncased", checkpoint="distilbert-base-uncased",
...@@ -509,7 +509,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel): ...@@ -509,7 +509,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
def get_output_embeddings(self): def get_output_embeddings(self):
return self.vocab_projector return self.vocab_projector
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="distilbert-base-uncased", checkpoint="distilbert-base-uncased",
...@@ -595,7 +595,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel): ...@@ -595,7 +595,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="distilbert-base-uncased", checkpoint="distilbert-base-uncased",
...@@ -676,7 +676,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel): ...@@ -676,7 +676,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="distilbert-base-uncased", checkpoint="distilbert-base-uncased",
...@@ -772,7 +772,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel): ...@@ -772,7 +772,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="distilbert-base-uncased", checkpoint="distilbert-base-uncased",
...@@ -856,7 +856,9 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel): ...@@ -856,7 +856,9 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_start_docstrings_to_model_forward(
DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")
)
@replace_return_docstrings(output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
......
...@@ -22,7 +22,12 @@ import torch ...@@ -22,7 +22,12 @@ import torch
from torch import Tensor, nn from torch import Tensor, nn
from .configuration_dpr import DPRConfig from .configuration_dpr import DPRConfig
from .file_utils import ModelOutput, add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings from .file_utils import (
ModelOutput,
add_start_docstrings,
add_start_docstrings_to_model_forward,
replace_return_docstrings,
)
from .modeling_bert import BertModel from .modeling_bert import BertModel
from .modeling_outputs import BaseModelOutputWithPooling from .modeling_outputs import BaseModelOutputWithPooling
from .modeling_utils import PreTrainedModel from .modeling_utils import PreTrainedModel
...@@ -431,7 +436,7 @@ class DPRContextEncoder(DPRPretrainedContextEncoder): ...@@ -431,7 +436,7 @@ class DPRContextEncoder(DPRPretrainedContextEncoder):
self.ctx_encoder = DPREncoder(config) self.ctx_encoder = DPREncoder(config)
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(DPR_ENCODERS_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(DPR_ENCODERS_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=DPRContextEncoderOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=DPRContextEncoderOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -509,7 +514,7 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder): ...@@ -509,7 +514,7 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
self.question_encoder = DPREncoder(config) self.question_encoder = DPREncoder(config)
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(DPR_ENCODERS_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(DPR_ENCODERS_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=DPRQuestionEncoderOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=DPRQuestionEncoderOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -586,7 +591,7 @@ class DPRReader(DPRPretrainedReader): ...@@ -586,7 +591,7 @@ class DPRReader(DPRPretrainedReader):
self.span_predictor = DPRSpanPredictor(config) self.span_predictor = DPRSpanPredictor(config)
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(DPR_READER_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(DPR_READER_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=DPRReaderOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=DPRReaderOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
......
...@@ -30,7 +30,7 @@ from .file_utils import ( ...@@ -30,7 +30,7 @@ from .file_utils import (
ModelOutput, ModelOutput,
add_code_sample_docstrings, add_code_sample_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_callable, add_start_docstrings_to_model_forward,
replace_return_docstrings, replace_return_docstrings,
) )
from .modeling_outputs import ( from .modeling_outputs import (
...@@ -693,7 +693,7 @@ class ElectraModel(ElectraPreTrainedModel): ...@@ -693,7 +693,7 @@ class ElectraModel(ElectraPreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="google/electra-small-discriminator", checkpoint="google/electra-small-discriminator",
...@@ -791,7 +791,7 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel): ...@@ -791,7 +791,7 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="google/electra-small-discriminator", checkpoint="google/electra-small-discriminator",
...@@ -873,7 +873,7 @@ class ElectraForPreTraining(ElectraPreTrainedModel): ...@@ -873,7 +873,7 @@ class ElectraForPreTraining(ElectraPreTrainedModel):
self.discriminator_predictions = ElectraDiscriminatorPredictions(config) self.discriminator_predictions = ElectraDiscriminatorPredictions(config)
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=ElectraForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=ElectraForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -971,7 +971,7 @@ class ElectraForMaskedLM(ElectraPreTrainedModel): ...@@ -971,7 +971,7 @@ class ElectraForMaskedLM(ElectraPreTrainedModel):
def get_output_embeddings(self): def get_output_embeddings(self):
return self.generator_lm_head return self.generator_lm_head
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="google/electra-small-discriminator", checkpoint="google/electra-small-discriminator",
...@@ -1060,7 +1060,7 @@ class ElectraForTokenClassification(ElectraPreTrainedModel): ...@@ -1060,7 +1060,7 @@ class ElectraForTokenClassification(ElectraPreTrainedModel):
self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.classifier = nn.Linear(config.hidden_size, config.num_labels)
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="google/electra-small-discriminator", checkpoint="google/electra-small-discriminator",
...@@ -1147,7 +1147,7 @@ class ElectraForQuestionAnswering(ElectraPreTrainedModel): ...@@ -1147,7 +1147,7 @@ class ElectraForQuestionAnswering(ElectraPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="google/electra-small-discriminator", checkpoint="google/electra-small-discriminator",
...@@ -1248,7 +1248,7 @@ class ElectraForMultipleChoice(ElectraPreTrainedModel): ...@@ -1248,7 +1248,7 @@ class ElectraForMultipleChoice(ElectraPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="google/electra-small-discriminator", checkpoint="google/electra-small-discriminator",
......
...@@ -19,7 +19,7 @@ from typing import Optional ...@@ -19,7 +19,7 @@ from typing import Optional
from .configuration_encoder_decoder import EncoderDecoderConfig from .configuration_encoder_decoder import EncoderDecoderConfig
from .configuration_utils import PretrainedConfig from .configuration_utils import PretrainedConfig
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings from .file_utils import add_start_docstrings, add_start_docstrings_to_model_forward, replace_return_docstrings
from .modeling_outputs import Seq2SeqLMOutput from .modeling_outputs import Seq2SeqLMOutput
from .modeling_utils import PreTrainedModel from .modeling_utils import PreTrainedModel
from .utils import logging from .utils import logging
...@@ -335,7 +335,7 @@ class EncoderDecoderModel(PreTrainedModel): ...@@ -335,7 +335,7 @@ class EncoderDecoderModel(PreTrainedModel):
config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config, **kwargs) config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config, **kwargs)
return cls(encoder=encoder, decoder=decoder, config=config) return cls(encoder=encoder, decoder=decoder, config=config)
@add_start_docstrings_to_callable(ENCODER_DECODER_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(ENCODER_DECODER_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
......
...@@ -21,7 +21,7 @@ import torch ...@@ -21,7 +21,7 @@ import torch
from torch.nn import functional as F from torch.nn import functional as F
from .configuration_flaubert import FlaubertConfig from .configuration_flaubert import FlaubertConfig
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
from .modeling_outputs import BaseModelOutput from .modeling_outputs import BaseModelOutput
from .modeling_xlm import ( from .modeling_xlm import (
XLMForMultipleChoice, XLMForMultipleChoice,
...@@ -140,7 +140,7 @@ class FlaubertModel(XLMModel): ...@@ -140,7 +140,7 @@ class FlaubertModel(XLMModel):
self.layerdrop = getattr(config, "layerdrop", 0.0) self.layerdrop = getattr(config, "layerdrop", 0.0)
self.pre_norm = getattr(config, "pre_norm", False) self.pre_norm = getattr(config, "pre_norm", False)
@add_start_docstrings_to_callable(FLAUBERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(FLAUBERT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="flaubert/flaubert_base_cased", checkpoint="flaubert/flaubert_base_cased",
......
...@@ -43,7 +43,7 @@ from .file_utils import ( ...@@ -43,7 +43,7 @@ from .file_utils import (
add_code_sample_docstrings, add_code_sample_docstrings,
add_end_docstrings, add_end_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_callable, add_start_docstrings_to_model_forward,
replace_return_docstrings, replace_return_docstrings,
) )
from .modeling_outputs import BaseModelOutput, BaseModelOutputWithPast, Seq2SeqLMOutput, Seq2SeqModelOutput from .modeling_outputs import BaseModelOutput, BaseModelOutputWithPast, Seq2SeqLMOutput, Seq2SeqModelOutput
...@@ -899,7 +899,7 @@ class FSMTModel(PretrainedFSMTModel): ...@@ -899,7 +899,7 @@ class FSMTModel(PretrainedFSMTModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(FSMT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(FSMT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="facebook/wmt19-ru-en", checkpoint="facebook/wmt19-ru-en",
...@@ -1039,7 +1039,7 @@ class FSMTForConditionalGeneration(PretrainedFSMTModel): ...@@ -1039,7 +1039,7 @@ class FSMTForConditionalGeneration(PretrainedFSMTModel):
return new_embeddings return new_embeddings
@add_start_docstrings_to_callable(FSMT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(FSMT_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
@add_end_docstrings(FSMT_GENERATION_EXAMPLE) @add_end_docstrings(FSMT_GENERATION_EXAMPLE)
def forward( def forward(
......
...@@ -30,7 +30,7 @@ from .file_utils import ( ...@@ -30,7 +30,7 @@ from .file_utils import (
ModelOutput, ModelOutput,
add_code_sample_docstrings, add_code_sample_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_callable, add_start_docstrings_to_model_forward,
replace_return_docstrings, replace_return_docstrings,
) )
from .modeling_outputs import ( from .modeling_outputs import (
...@@ -906,7 +906,7 @@ class FunnelBaseModel(FunnelPreTrainedModel): ...@@ -906,7 +906,7 @@ class FunnelBaseModel(FunnelPreTrainedModel):
def set_input_embeddings(self, new_embeddings): def set_input_embeddings(self, new_embeddings):
self.embeddings.word_embeddings = new_embeddings self.embeddings.word_embeddings = new_embeddings
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="funnel-transformer/small-base", checkpoint="funnel-transformer/small-base",
...@@ -983,7 +983,7 @@ class FunnelModel(FunnelPreTrainedModel): ...@@ -983,7 +983,7 @@ class FunnelModel(FunnelPreTrainedModel):
def set_input_embeddings(self, new_embeddings): def set_input_embeddings(self, new_embeddings):
self.embeddings.word_embeddings = new_embeddings self.embeddings.word_embeddings = new_embeddings
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="funnel-transformer/small", checkpoint="funnel-transformer/small",
...@@ -1082,7 +1082,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel): ...@@ -1082,7 +1082,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
self.discriminator_predictions = FunnelDiscriminatorPredictions(config) self.discriminator_predictions = FunnelDiscriminatorPredictions(config)
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=FunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=FunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -1167,7 +1167,7 @@ class FunnelForMaskedLM(FunnelPreTrainedModel): ...@@ -1167,7 +1167,7 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
def get_output_embeddings(self): def get_output_embeddings(self):
return self.lm_head return self.lm_head
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="funnel-transformer/small", checkpoint="funnel-transformer/small",
...@@ -1240,7 +1240,7 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel): ...@@ -1240,7 +1240,7 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
self.classifier = FunnelClassificationHead(config, config.num_labels) self.classifier = FunnelClassificationHead(config, config.num_labels)
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="funnel-transformer/small-base", checkpoint="funnel-transformer/small-base",
...@@ -1317,7 +1317,7 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel): ...@@ -1317,7 +1317,7 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel):
self.classifier = FunnelClassificationHead(config, 1) self.classifier = FunnelClassificationHead(config, 1)
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="funnel-transformer/small-base", checkpoint="funnel-transformer/small-base",
...@@ -1403,7 +1403,7 @@ class FunnelForTokenClassification(FunnelPreTrainedModel): ...@@ -1403,7 +1403,7 @@ class FunnelForTokenClassification(FunnelPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="funnel-transformer/small", checkpoint="funnel-transformer/small",
...@@ -1485,7 +1485,7 @@ class FunnelForQuestionAnswering(FunnelPreTrainedModel): ...@@ -1485,7 +1485,7 @@ class FunnelForQuestionAnswering(FunnelPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="funnel-transformer/small", checkpoint="funnel-transformer/small",
......
...@@ -30,7 +30,7 @@ from .file_utils import ( ...@@ -30,7 +30,7 @@ from .file_utils import (
ModelOutput, ModelOutput,
add_code_sample_docstrings, add_code_sample_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_callable, add_start_docstrings_to_model_forward,
replace_return_docstrings, replace_return_docstrings,
) )
from .modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast from .modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
...@@ -502,7 +502,7 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -502,7 +502,7 @@ class GPT2Model(GPT2PreTrainedModel):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.h[layer].attn.prune_heads(heads) self.h[layer].attn.prune_heads(heads)
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="gpt2", checkpoint="gpt2",
...@@ -723,7 +723,7 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): ...@@ -723,7 +723,7 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
"attention_mask": attention_mask, "attention_mask": attention_mask,
} }
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="gpt2", checkpoint="gpt2",
...@@ -837,7 +837,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -837,7 +837,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
"use_cache": kwargs.get("use_cache"), "use_cache": kwargs.get("use_cache"),
} }
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=GPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=GPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC)
def forward( def forward(
self, self,
...@@ -987,7 +987,7 @@ class GPT2ForSequenceClassification(GPT2PreTrainedModel): ...@@ -987,7 +987,7 @@ class GPT2ForSequenceClassification(GPT2PreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="microsoft/dialogrpt", checkpoint="microsoft/dialogrpt",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment