Unverified Commit c662c78c authored by Jeremy Fowers's avatar Jeremy Fowers Committed by GitHub
Browse files

Fix the documentation checkpoint for xlm-roberta-xl (#28567)

* Fix the documentation checkpoint for xlm-roberta-xl

* Improve docstring consistency
parent 0754217c
...@@ -47,7 +47,7 @@ from .configuration_xlm_roberta_xl import XLMRobertaXLConfig ...@@ -47,7 +47,7 @@ from .configuration_xlm_roberta_xl import XLMRobertaXLConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
_CHECKPOINT_FOR_DOC = "xlm-roberta-xlarge" _CHECKPOINT_FOR_DOC = "facebook/xlm-roberta-xl"
_CONFIG_FOR_DOC = "XLMRobertaXLConfig" _CONFIG_FOR_DOC = "XLMRobertaXLConfig"
XLM_ROBERTA_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [ XLM_ROBERTA_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [
...@@ -653,7 +653,7 @@ XLM_ROBERTA_XL_INPUTS_DOCSTRING = r""" ...@@ -653,7 +653,7 @@ XLM_ROBERTA_XL_INPUTS_DOCSTRING = r"""
@add_start_docstrings( @add_start_docstrings(
"The bare XLM-RoBERTa-xlarge Model transformer outputting raw hidden-states without any specific head on top.", "The bare XLM-RoBERTa-XL Model transformer outputting raw hidden-states without any specific head on top.",
XLM_ROBERTA_XL_START_DOCSTRING, XLM_ROBERTA_XL_START_DOCSTRING,
) )
class XLMRobertaXLModel(XLMRobertaXLPreTrainedModel): class XLMRobertaXLModel(XLMRobertaXLPreTrainedModel):
...@@ -833,7 +833,7 @@ class XLMRobertaXLModel(XLMRobertaXLPreTrainedModel): ...@@ -833,7 +833,7 @@ class XLMRobertaXLModel(XLMRobertaXLPreTrainedModel):
@add_start_docstrings( @add_start_docstrings(
"""XLM-RoBERTa-xlarge Model with a `language modeling` head on top for CLM fine-tuning.""", """XLM-RoBERTa-XL Model with a `language modeling` head on top for CLM fine-tuning.""",
XLM_ROBERTA_XL_START_DOCSTRING, XLM_ROBERTA_XL_START_DOCSTRING,
) )
class XLMRobertaXLForCausalLM(XLMRobertaXLPreTrainedModel): class XLMRobertaXLForCausalLM(XLMRobertaXLPreTrainedModel):
...@@ -990,7 +990,7 @@ class XLMRobertaXLForCausalLM(XLMRobertaXLPreTrainedModel): ...@@ -990,7 +990,7 @@ class XLMRobertaXLForCausalLM(XLMRobertaXLPreTrainedModel):
@add_start_docstrings( @add_start_docstrings(
"""XLM-RoBERTa-xlarge Model with a `language modeling` head on top.""", XLM_ROBERTA_XL_START_DOCSTRING """XLM-RoBERTa-XL Model with a `language modeling` head on top.""", XLM_ROBERTA_XL_START_DOCSTRING
) )
class XLMRobertaXLForMaskedLM(XLMRobertaXLPreTrainedModel): class XLMRobertaXLForMaskedLM(XLMRobertaXLPreTrainedModel):
_tied_weights_keys = ["lm_head.decoder.weight", "lm_head.decoder.bias"] _tied_weights_keys = ["lm_head.decoder.weight", "lm_head.decoder.bias"]
...@@ -1081,7 +1081,7 @@ class XLMRobertaXLForMaskedLM(XLMRobertaXLPreTrainedModel): ...@@ -1081,7 +1081,7 @@ class XLMRobertaXLForMaskedLM(XLMRobertaXLPreTrainedModel):
class XLMRobertaXLLMHead(nn.Module): class XLMRobertaXLLMHead(nn.Module):
"""XLM-Roberta-xlarge Head for masked language modeling.""" """XLM-RoBERTa-XL Head for masked language modeling."""
def __init__(self, config): def __init__(self, config):
super().__init__() super().__init__()
...@@ -1109,7 +1109,7 @@ class XLMRobertaXLLMHead(nn.Module): ...@@ -1109,7 +1109,7 @@ class XLMRobertaXLLMHead(nn.Module):
@add_start_docstrings( @add_start_docstrings(
""" """
XLM-RoBERTa-xlarge Model transformer with a sequence classification/regression head on top (a linear layer on top XLM-RoBERTa-XL Model transformer with a sequence classification/regression head on top (a linear layer on top
of the pooled output) e.g. for GLUE tasks. of the pooled output) e.g. for GLUE tasks.
""", """,
XLM_ROBERTA_XL_START_DOCSTRING, XLM_ROBERTA_XL_START_DOCSTRING,
...@@ -1203,7 +1203,7 @@ class XLMRobertaXLForSequenceClassification(XLMRobertaXLPreTrainedModel): ...@@ -1203,7 +1203,7 @@ class XLMRobertaXLForSequenceClassification(XLMRobertaXLPreTrainedModel):
@add_start_docstrings( @add_start_docstrings(
""" """
XLM-Roberta-xlarge Model with a multiple choice classification head on top (a linear layer on top of the pooled XLM-RoBERTa-XL Model with a multiple choice classification head on top (a linear layer on top of the pooled
output and a softmax) e.g. for RocStories/SWAG tasks. output and a softmax) e.g. for RocStories/SWAG tasks.
""", """,
XLM_ROBERTA_XL_START_DOCSTRING, XLM_ROBERTA_XL_START_DOCSTRING,
...@@ -1294,7 +1294,7 @@ class XLMRobertaXLForMultipleChoice(XLMRobertaXLPreTrainedModel): ...@@ -1294,7 +1294,7 @@ class XLMRobertaXLForMultipleChoice(XLMRobertaXLPreTrainedModel):
@add_start_docstrings( @add_start_docstrings(
""" """
XLM-Roberta-xlarge Model with a token classification head on top (a linear layer on top of the hidden-states XLM-RoBERTa-XL Model with a token classification head on top (a linear layer on top of the hidden-states
output) e.g. for Named-Entity-Recognition (NER) tasks. output) e.g. for Named-Entity-Recognition (NER) tasks.
""", """,
XLM_ROBERTA_XL_START_DOCSTRING, XLM_ROBERTA_XL_START_DOCSTRING,
...@@ -1405,7 +1405,7 @@ class XLMRobertaXLClassificationHead(nn.Module): ...@@ -1405,7 +1405,7 @@ class XLMRobertaXLClassificationHead(nn.Module):
@add_start_docstrings( @add_start_docstrings(
""" """
XLM-Roberta-xlarge Model with a span classification head on top for extractive question-answering tasks like SQuAD XLM-RoBERTa-XL Model with a span classification head on top for extractive question-answering tasks like SQuAD
(a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`). (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
""", """,
XLM_ROBERTA_XL_START_DOCSTRING, XLM_ROBERTA_XL_START_DOCSTRING,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment