Refactoring checkpoint names for multiple models (#10527)

* Refactor checkpoint name in ALBERT and ALBERT_tf * Refactor checkpoint name in BART and BART_tf * Refactor checkpoint name in BERT generation * Refactor checkpoint name in Blenderbot_tf * Refactor checkpoint name in Blenderbot_small_tf * Refactor checkpoint name in ConvBERT AND CONVBERT_TF * Refactor checkpoint name in CTRL AND CTRL_TF * Refactor checkpoint name in DistilBERT AND DistilBERT_TF * Refactor checkpoint name in DistilBERT redo * Refactor checkpoint name in Electra and Electra_tf * Refactor checkpoint name in FlauBERT and FlauBERT_tf * Refactor checkpoint name in FSMT * Refactor checkpoint name in GPT2 and GPT2_tf * Refactor checkpoint name in IBERT * Refactor checkpoint name in LED and LED_tf * Refactor checkpoint name in Longformer and Longformer_tf * Refactor checkpoint name in Lxmert and Lxmert_tf * Refactor checkpoint name in Marian_tf * Refactor checkpoint name in MBART and MBART_tf * Refactor checkpoint name in MobileBERT and MobileBERT_tf * Refactor checkpoint name in mpnet and mpnet_tf * Refactor checkpoint name in openai and openai_tf * Refactor checkpoint name in pegasus_tf * Refactor checkpoint name in reformer * Refactor checkpoint name in Roberta and Roberta_tf * Refactor checkpoint name in SqueezeBert * Refactor checkpoint name in Transformer_xl and Transformer_xl_tf * Refactor checkpoint name in XLM and XLM_tf * Refactor checkpoint name in XLNET and XLNET_tf * Refactor checkpoint name in BERT_tf * run make tests, style, quality, fixup

Refactoring checkpoint names for multiple models (#10527)
* Refactor checkpoint name in ALBERT and ALBERT_tf * Refactor checkpoint name in BART and BART_tf * Refactor checkpoint name in BERT generation * Refactor checkpoint name in Blenderbot_tf * Refactor checkpoint name in Blenderbot_small_tf * Refactor checkpoint name in ConvBERT AND CONVBERT_TF * Refactor checkpoint name in CTRL AND CTRL_TF * Refactor checkpoint name in DistilBERT AND DistilBERT_TF * Refactor checkpoint name in DistilBERT redo * Refactor checkpoint name in Electra and Electra_tf * Refactor checkpoint name in FlauBERT and FlauBERT_tf * Refactor checkpoint name in FSMT * Refactor checkpoint name in GPT2 and GPT2_tf * Refactor checkpoint name in IBERT * Refactor checkpoint name in LED and LED_tf * Refactor checkpoint name in Longformer and Longformer_tf * Refactor checkpoint name in Lxmert and Lxmert_tf * Refactor checkpoint name in Marian_tf * Refactor checkpoint name in MBART and MBART_tf * Refactor checkpoint name in MobileBERT and MobileBERT_tf * Refactor checkpoint name in mpnet and mpnet_tf * Refactor checkpoint name in openai and openai_tf * Refactor checkpoint name in pegasus_tf * Refactor checkpoint name in reformer * Refactor checkpoint name in Roberta and Roberta_tf * Refactor checkpoint name in SqueezeBert * Refactor checkpoint name in Transformer_xl and Transformer_xl_tf * Refactor checkpoint name in XLM and XLM_tf * Refactor checkpoint name in XLNET and XLNET_tf * Refactor checkpoint name in BERT_tf * run make tests, style, quality, fixup
90ecc296 · Daniel Hug · GitHub · defe9e20 · 90ecc296 · 90ecc296
Unverified Commit 90ecc296 authored Mar 05, 2021 by Daniel Hug Committed by GitHub Mar 05, 2021
7 changed files
--- a/src/transformers/models/squeezebert/modeling_squeezebert.py
+++ b/src/transformers/models/squeezebert/modeling_squeezebert.py
@@ -39,6 +39,7 @@ from .configuration_squeezebert import SqueezeBertConfig
 logger = logging.get_logger(__name__)
+_CHECKPOINT_FOR_DOC = "squeezebert/squeezebert-uncased"
 _CONFIG_FOR_DOC = "SqueezeBertConfig"
 _TOKENIZER_FOR_DOC = "SqueezeBertTokenizer"
@@ -572,7 +573,7 @@ class SqueezeBertModel(SqueezeBertPreTrainedModel):
    @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="squeezebert/squeezebert-mnli-headless",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=BaseModelOutputWithPooling,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -665,7 +666,7 @@ class SqueezeBertForMaskedLM(SqueezeBertPreTrainedModel):
    @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="squeezebert/squeezebert-uncased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=MaskedLMOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -743,7 +744,7 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel):
    @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="squeezebert/squeezebert-mnli-headless",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -829,7 +830,7 @@ class SqueezeBertForMultipleChoice(SqueezeBertPreTrainedModel):
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="squeezebert/squeezebert-mnli-headless",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=MultipleChoiceModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -921,7 +922,7 @@ class SqueezeBertForTokenClassification(SqueezeBertPreTrainedModel):
    @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="squeezebert/squeezebert-mnli-headless",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TokenClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1008,7 +1009,7 @@ class SqueezeBertForQuestionAnswering(SqueezeBertPreTrainedModel):
    @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="squeezebert/squeezebert-mnli-headless",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=QuestionAnsweringModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )

--- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py
+++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py
@@ -43,6 +43,7 @@ from .modeling_tf_transfo_xl_utilities import TFAdaptiveSoftmaxMask
 logger = logging.get_logger(__name__)
+_CHECKPOINT_FOR_DOC = "transfo-xl-wt103"
 _CONFIG_FOR_DOC = "TransfoXLConfig"
 _TOKENIZER_FOR_DOC = "TransfoXLTokenizer"
@@ -883,7 +884,7 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
    @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING)
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="transfo-xl-wt103",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFTransfoXLModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -975,7 +976,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
    @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING)
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="transfo-xl-wt103",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFTransfoXLLMHeadModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1091,7 +1092,7 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc
    @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING)
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="transfo-xl-wt103",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFTransfoXLSequenceClassifierOutputWithPast,
        config_class=_CONFIG_FOR_DOC,
    )

--- a/src/transformers/models/transfo_xl/modeling_transfo_xl.py
+++ b/src/transformers/models/transfo_xl/modeling_transfo_xl.py
@@ -39,6 +39,7 @@ from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax
 logger = logging.get_logger(__name__)
+_CHECKPOINT_FOR_DOC = "transfo-xl-wt103"
 _CONFIG_FOR_DOC = "TransfoXLConfig"
 _TOKENIZER_FOR_DOC = "TransfoXLTokenizer"
@@ -872,7 +873,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
    @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING)
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="transfo-xl-wt103",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TransfoXLModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1053,7 +1054,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
    @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING)
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="transfo-xl-wt103",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TransfoXLLMHeadModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1175,7 +1176,7 @@ class TransfoXLForSequenceClassification(TransfoXLPreTrainedModel):
    @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING)
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="transfo-xl-wt103",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TransfoXLSequenceClassifierOutputWithPast,
        config_class=_CONFIG_FOR_DOC,
    )

--- a/src/transformers/models/xlm/modeling_tf_xlm.py
+++ b/src/transformers/models/xlm/modeling_tf_xlm.py
@@ -58,6 +58,7 @@ from .configuration_xlm import XLMConfig
 logger = logging.get_logger(__name__)
+_CHECKPOINT_FOR_DOC = "xlm-mlm-en-2048"
 _CONFIG_FOR_DOC = "XLMConfig"
 _TOKENIZER_FOR_DOC = "XLMTokenizer"
@@ -703,7 +704,7 @@ class TFXLMModel(TFXLMPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFBaseModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -856,7 +857,7 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFXLMWithLMHeadModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -946,7 +947,7 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel, TFSequenceClassificat
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFSequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1072,7 +1073,7 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFMultipleChoiceModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1222,7 +1223,7 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFTokenClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1327,7 +1328,7 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel, TFQuestionAnsweringL
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFQuestionAnsweringModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )

--- a/src/transformers/models/xlm/modeling_xlm.py
+++ b/src/transformers/models/xlm/modeling_xlm.py
@@ -57,6 +57,7 @@ from .configuration_xlm import XLMConfig
 logger = logging.get_logger(__name__)
+_CHECKPOINT_FOR_DOC = "xlm-mlm-en-2048"
 _CONFIG_FOR_DOC = "XLMConfig"
 _TOKENIZER_FOR_DOC = "XLMTokenizer"
@@ -489,7 +490,7 @@ class XLMModel(XLMPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=BaseModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -709,7 +710,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=MaskedLMOutput,
        config_class=_CONFIG_FOR_DOC,
        mask="<special1>",
@@ -787,7 +788,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -874,7 +875,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=QuestionAnsweringModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1097,7 +1098,7 @@ class XLMForTokenClassification(XLMPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TokenClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1190,7 +1191,7 @@ class XLMForMultipleChoice(XLMPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, num_choicec, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlm-mlm-en-2048",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=MultipleChoiceModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )

--- a/src/transformers/models/xlnet/modeling_tf_xlnet.py
+++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py
@@ -52,6 +52,7 @@ from .configuration_xlnet import XLNetConfig
 logger = logging.get_logger(__name__)
+_CHECKPOINT_FOR_DOC = "xlnet-base-cased"
 _CONFIG_FOR_DOC = "XLNetConfig"
 _TOKENIZER_FOR_DOC = "XLNetTokenizer"
@@ -1154,7 +1155,7 @@ class TFXLNetModel(TFXLNetPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlnet-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFXLNetModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1423,7 +1424,7 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif
    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlnet-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFXLNetForSequenceClassificationOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1549,7 +1550,7 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss):
    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlnet-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFXLNetForMultipleChoiceOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1698,7 +1699,7 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio
    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlnet-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFXLNetForTokenClassificationOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1805,7 +1806,7 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer
    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlnet-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFXLNetForQuestionAnsweringSimpleOutput,
        config_class=_CONFIG_FOR_DOC,
    )

--- a/src/transformers/models/xlnet/modeling_xlnet.py
+++ b/src/transformers/models/xlnet/modeling_xlnet.py
@@ -47,6 +47,7 @@ from .configuration_xlnet import XLNetConfig
 logger = logging.get_logger(__name__)
+_CHECKPOINT_FOR_DOC = "xlnet-base-cased"
 _CONFIG_FOR_DOC = "XLNetConfig"
 _TOKENIZER_FOR_DOC = "XLNetTokenizer"
@@ -1070,7 +1071,7 @@ class XLNetModel(XLNetPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlnet-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=XLNetModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1497,7 +1498,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlnet-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=XLNetForSequenceClassificationOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1591,7 +1592,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlnet-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=XLNetForTokenClassificationOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1688,7 +1689,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlnet-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=XLNetForMultipleChoiceOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1791,7 +1792,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
    @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="xlnet-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=XLNetForQuestionAnsweringSimpleOutput,
        config_class=_CONFIG_FOR_DOC,
    )