Add disclaimer for necessary fake models (#21178)

* Add disclaimer for necessary fake models * Address review comments * Use for GPT-NeoX as well

Add disclaimer for necessary fake models (#21178)
* Add disclaimer for necessary fake models * Address review comments * Use for GPT-NeoX as well
862888a3 · Sylvain Gugger · GitHub · 87208a05 · 862888a3 · 862888a3
Unverified Commit 862888a3 authored Jan 19, 2023 by Sylvain Gugger Committed by GitHub Jan 19, 2023
3 changed files
--- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py
+++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py
@@ -36,9 +36,9 @@ from .configuration_gpt_neox import GPTNeoXConfig
 logger = logging.get_logger(__name__)
-_CHECKPOINT_FOR_DOC = "gpt-neox-20b"
+_CHECKPOINT_FOR_DOC = "trl-internal-testing/tiny-random-GPTNeoXForCausalLM"
+_REAL_CHECKPOINT_FOR_DOC = "EleutherAI/gpt-neox-20b"
 _CONFIG_FOR_DOC = "GPTNeoXConfig"
-_TOKENIZER_FOR_DOC = "GPTNeoXTokenizerFast"
 GPT_NEOX_PRETRAINED_MODEL_ARCHIVE_LIST = [
    "EleutherAI/gpt-neox-20b",
@@ -435,8 +435,8 @@ class GPTNeoXModel(GPTNeoXPreTrainedModel):
    @add_start_docstrings_to_model_forward(GPT_NEOX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
-        processor_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
+        real_checkpoint=_REAL_CHECKPOINT_FOR_DOC,
        output_type=BaseModelOutputWithPast,
        config_class=_CONFIG_FOR_DOC,
    )

--- a/src/transformers/models/gptj/modeling_gptj.py
+++ b/src/transformers/models/gptj/modeling_gptj.py
@@ -37,16 +37,8 @@ from .configuration_gptj import GPTJConfig
 logger = logging.get_logger(__name__)
 _CHECKPOINT_FOR_DOC = "hf-internal-testing/tiny-random-gptj"
+_REAL_CHECKPOINT_FOR_DOC = "EleutherAI/gpt-j-6B"
 _CONFIG_FOR_DOC = "GPTJConfig"
-_TOKENIZER_FOR_DOC = "GPT2Tokenizer"
-_CHECKPOINT_FOR_QA = "ydshieh/tiny-random-gptj-for-question-answering"
-_QA_EXPECTED_OUTPUT = "' was Jim Henson?Jim Henson was a n'"
-_QA_EXPECTED_LOSS = 3.13
-_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "ydshieh/tiny-random-gptj-for-sequence-classification"
-_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_0'"
-_SEQ_CLASS_EXPECTED_LOSS = 0.76
 GPTJ_PRETRAINED_MODEL_ARCHIVE_LIST = [
@@ -536,10 +528,10 @@ class GPTJModel(GPTJPreTrainedModel):
    @add_start_docstrings_to_model_forward(GPTJ_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
-        processor_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=BaseModelOutputWithPast,
        config_class=_CONFIG_FOR_DOC,
+        real_checkpoint=_REAL_CHECKPOINT_FOR_DOC,
    )
    def forward(
        self,
@@ -790,10 +782,10 @@ class GPTJForCausalLM(GPTJPreTrainedModel):
    @add_start_docstrings_to_model_forward(GPTJ_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
-        processor_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=CausalLMOutputWithPast,
        config_class=_CONFIG_FOR_DOC,
+        real_checkpoint=_REAL_CHECKPOINT_FOR_DOC,
    )
    def forward(
        self,
@@ -912,12 +904,10 @@ class GPTJForSequenceClassification(GPTJPreTrainedModel):
    @add_start_docstrings_to_model_forward(GPTJ_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
-        processor_class=_TOKENIZER_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
        output_type=SequenceClassifierOutputWithPast,
        config_class=_CONFIG_FOR_DOC,
-        expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
+        real_checkpoint=_REAL_CHECKPOINT_FOR_DOC,
-        expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
    )
    def forward(
        self,
@@ -1039,12 +1029,10 @@ class GPTJForQuestionAnswering(GPTJPreTrainedModel):
    @add_start_docstrings_to_model_forward(GPTJ_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
-        processor_class=_TOKENIZER_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_QA,
        output_type=QuestionAnsweringModelOutput,
        config_class=_CONFIG_FOR_DOC,
-        expected_output=_QA_EXPECTED_OUTPUT,
+        real_checkpoint=_REAL_CHECKPOINT_FOR_DOC,
-        expected_loss=_QA_EXPECTED_LOSS,
    )
    def forward(
        self,

--- a/src/transformers/utils/doc.py
+++ b/src/transformers/utils/doc.py
@@ -146,6 +146,17 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
    return result
+FAKE_MODEL_DISCLAIMER = """
+    <Tip warning={true}>
+    This example uses a random model as the real ones are all very big. To get proper results, you should use
+    {real_checkpoint} instead of {fake_checkpoint}. If you get out-of-memory when loading that checkpoint, you can try
+    adding `device_map="auto"` in the `from_pretrained` call.
+    </Tip>
+"""
 PT_TOKEN_CLASSIFICATION_SAMPLE = r"""
    Example:
@@ -1058,6 +1069,7 @@ def add_code_sample_docstrings(
    modality=None,
    expected_output=None,
    expected_loss=None,
+    real_checkpoint=None,
 ):
    def docstring_decorator(fn):
        # model_class defaults to function's class if not specified otherwise
@@ -1082,6 +1094,9 @@ def add_code_sample_docstrings(
            qa_target_end_index=qa_target_end_index,
            expected_output=expected_output,
            expected_loss=expected_loss,
+            real_checkpoint=real_checkpoint,
+            fake_checkpoint=checkpoint,
+            true="{true}",  # For <Tip warning={true}> syntax that conflicts with formatting.
        )
        if ("SequenceClassification" in model_class or "AudioClassification" in model_class) and modality == "audio":
@@ -1118,6 +1133,8 @@ def add_code_sample_docstrings(
        code_sample = filter_outputs_from_example(
            code_sample, expected_output=expected_output, expected_loss=expected_loss
        )
+        if real_checkpoint is not None:
+            code_sample = FAKE_MODEL_DISCLAIMER + code_sample
        func_doc = (fn.__doc__ or "") + "".join(docstr)
        output_doc = "" if output_type is None else _prepare_output_docstrings(output_type, config_class)
        built_doc = code_sample.format(**doc_kwargs)