@@ -1042,12 +1042,12 @@ class RobertaPreLayerNormForCausalLM(RobertaPreLayerNormPreTrainedModel):
@add_start_docstrings(
"""RoBERTa-PreLayerNorm Model with a `language modeling` head on top.""",ROBERTA_PRELAYERNORM_START_DOCSTRING
)
# Copied from transformers.models.roberta.modeling_roberta.RobertaForMaskedLM with ROBERTA->ROBERTA_PRELAYERNORM,Roberta->RobertaPreLayerNorm,roberta->roberta_prelayernorm
# Copied from transformers.models.roberta.modeling_roberta.RobertaForMaskedLM.__init__ with ROBERTA->ROBERTA_PRELAYERNORM,Roberta->RobertaPreLayerNorm,roberta->roberta_prelayernorm
def__init__(self,config):
super().__init__(config)
...
...
@@ -1078,10 +1078,8 @@ class RobertaPreLayerNormForMaskedLM(RobertaPreLayerNormPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=MaskedLMOutput,
config_class=_CONFIG_FOR_DOC,
mask="<mask>",
expected_output="' Paris'",
expected_loss=0.1,
)
# Copied from transformers.models.roberta.modeling_roberta.RobertaForMaskedLM.forward with ROBERTA->ROBERTA_PRELAYERNORM,Roberta->RobertaPreLayerNorm,roberta->roberta_prelayernorm
defforward(
self,
input_ids:Optional[torch.LongTensor]=None,
...
...
@@ -1199,8 +1197,6 @@ class RobertaPreLayerNormForSequenceClassification(RobertaPreLayerNormPreTrained
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="'optimism'",
expected_loss=0.08,
)
# Copied from transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification.forward with roberta->roberta_prelayernorm
defforward(
...
...
@@ -1400,8 +1396,6 @@ class RobertaPreLayerNormForTokenClassification(RobertaPreLayerNormPreTrainedMod