GPT Neo few fixes (#10968)

* fix checkpoint names * auto model * fix doc

GPT Neo few fixes (#10968)
* fix checkpoint names * auto model * fix doc
83d38c9f · Suraj Patil · GitHub · 7772ddb4 · 83d38c9f · 83d38c9f
Unverified Commit 83d38c9f authored Mar 30, 2021 by Suraj Patil Committed by GitHub Mar 30, 2021
7 changed files
--- a/docs/source/model_doc/gpt_neo.rst
+++ b/docs/source/model_doc/gpt_neo.rst
@@ -31,8 +31,8 @@ The :obj:`generate()` method can be used to generate text using GPT Neo model.
 .. code-block::
    >>> from transformers import GPTNeoForCausalLM, GPT2Tokenizer
-    >>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
+    >>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
-    >>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl")
+    >>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
    >>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
    ...          "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \

--- a/docs/source/pretrained_models.rst
+++ b/docs/source/pretrained_models.rst
@@ -139,10 +139,10 @@ For the full list, refer to `https://huggingface.co/models <https://huggingface.
 |                    | ``gpt2-xl``                                                | | 48-layer, 1600-hidden, 25-heads, 1558M parameters.                                                                                  |
 |                    |                                                            | | OpenAI's XL-sized GPT-2 English model                                                                                               |
 +--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
-| GPTNeo             | ``EleutherAI/gpt_neo_xl``                                  | | 24-layer, 2048-hidden, 16-heads, 1.3B parameters.                                                                                   |
+| GPTNeo             | ``EleutherAI/gpt-neo-1.3B``                                | | 24-layer, 2048-hidden, 16-heads, 1.3B parameters.                                                                                   |
 |                    |                                                            | | EleutherAI's GPT-3 like language model.                                                                                             |
 |                    +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
-|                    | ``EleutherAI/gpt_neo_2.7B``                                | | 32-layer, 2560-hidden, 20-heads, 2.7B parameters.                                                                                   |
+|                    | ``EleutherAI/gpt-neo-2.7B``                                | | 32-layer, 2560-hidden, 20-heads, 2.7B parameters.                                                                                   |
 |                    |                                                            | | EleutherAI's GPT-3 like language model.                                                                                             |
 +--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
 | Transformer-XL     | ``transfo-xl-wt103``                                       | | 18-layer, 1024-hidden, 16-heads, 257M parameters.                                                                                   |

--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -418,6 +418,7 @@ MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
 MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
    [
        # Model with LM heads mapping
+        (GPTNeoConfig, GPTNeoForCausalLM),
        (BigBirdConfig, BigBirdForMaskedLM),
        (Speech2TextConfig, Speech2TextForConditionalGeneration),
        (Wav2Vec2Config, Wav2Vec2ForMaskedLM),

--- a/src/transformers/models/gpt_neo/configuration_gpt_neo.py
+++ b/src/transformers/models/gpt_neo/configuration_gpt_neo.py
@@ -21,7 +21,7 @@ from ...utils import logging
 logger = logging.get_logger(__name__)
 GPT_NEO_PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    "EleutherAI/gpt_neo_xl": "https://huggingface.co/EleutherAI/gpt_neo_xl/resolve/main/config.json",
+    "EleutherAI/gpt-neo-1.3B": "https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json",
    # See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
 }
@@ -30,8 +30,8 @@ class GPTNeoConfig(PretrainedConfig):
    r"""
    This is the configuration class to store the configuration of a :class:`~transformers.GPTNeoModel`. It is used to
    instantiate a GPT Neo model according to the specified arguments, defining the model architecture. Instantiating a
-    configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt_neo_xl
+    configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt-neo-1.3B
-    <https://huggingface.co/EleutherAI/gpt_neo_xl>`__ architecture.
+    <https://huggingface.co/EleutherAI/gpt-neo-1.3B>`__ architecture.
    Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
    outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
@@ -81,10 +81,10 @@ class GPTNeoConfig(PretrainedConfig):
        >>> from transformers import GPTNeoModel, GPTNeoConfig
-        >>> # Initializing a GPTNeo EleutherAI/gpt_neo_xl style configuration
+        >>> # Initializing a GPTNeo EleutherAI/gpt-neo-1.3B style configuration
        >>> configuration = GPTNeoConfig()
-        >>> # Initializing a model from the EleutherAI/gpt_neo_xl style configuration
+        >>> # Initializing a model from the EleutherAI/gpt-neo-1.3B style configuration
        >>> model = GPTNeoModel(configuration)
        >>> # Accessing the model configuration

--- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py
+++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py
@@ -43,11 +43,11 @@ _CONFIG_FOR_DOC = "GPTNeoConfig"
 _TOKENIZER_FOR_DOC = "GPT2Tokenizer"
 GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST = [
-    "EleutherAI/gpt_neo_xl",
+    "EleutherAI/gpt-neo-1.3B",
    # See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
 ]
-_CHECKPOINT_FOR_DOC = "EleutherAI/gpt_neo_xl"
+_CHECKPOINT_FOR_DOC = "EleutherAI/gpt-neo-1.3B"
 def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):

--- a/src/transformers/pipelines/text_generation.py
+++ b/src/transformers/pipelines/text_generation.py
@@ -35,6 +35,7 @@ class TextGenerationPipeline(Pipeline):
        "TransfoXLLMHeadModel",
        "ReformerModelWithLMHead",
        "GPT2LMHeadModel",
+        "GPTNeoForCausalLM",
        "OpenAIGPTLMHeadModel",
        "CTRLLMHeadModel",
        "TFXLNetLMHeadModel",

--- a/tests/test_modeling_gpt_neo.py
+++ b/tests/test_modeling_gpt_neo.py
@@ -432,7 +432,7 @@ class GPTNeoModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase
    @slow
    def test_batch_generation(self):
-        model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
+        model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
        model.to(torch_device)
        tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
@@ -486,7 +486,7 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase):
    @slow
    def test_lm_generate_gpt_neo(self):
        for checkpointing in [True, False]:
-            model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl", gradient_checkpointing=checkpointing)
+            model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B", gradient_checkpointing=checkpointing)
            model.to(torch_device)
            input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device)  # The dog
            # fmt: off
@@ -497,8 +497,8 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase):
    @slow
    def test_gpt_neo_sample(self):
-        tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl")
+        tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
-        model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
+        model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
        model.to(torch_device)
        torch.manual_seed(0)