Unverified Commit 83d38c9f authored by Suraj Patil's avatar Suraj Patil Committed by GitHub
Browse files

GPT Neo few fixes (#10968)

* fix checkpoint names

* auto model

* fix doc
parent 7772ddb4
...@@ -31,8 +31,8 @@ The :obj:`generate()` method can be used to generate text using GPT Neo model. ...@@ -31,8 +31,8 @@ The :obj:`generate()` method can be used to generate text using GPT Neo model.
.. code-block:: .. code-block::
>>> from transformers import GPTNeoForCausalLM, GPT2Tokenizer >>> from transformers import GPTNeoForCausalLM, GPT2Tokenizer
>>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl") >>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
>>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl") >>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
>>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \ >>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
... "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \ ... "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \
......
...@@ -139,10 +139,10 @@ For the full list, refer to `https://huggingface.co/models <https://huggingface. ...@@ -139,10 +139,10 @@ For the full list, refer to `https://huggingface.co/models <https://huggingface.
| | ``gpt2-xl`` | | 48-layer, 1600-hidden, 25-heads, 1558M parameters. | | | ``gpt2-xl`` | | 48-layer, 1600-hidden, 25-heads, 1558M parameters. |
| | | | OpenAI's XL-sized GPT-2 English model | | | | | OpenAI's XL-sized GPT-2 English model |
+--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ +--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| GPTNeo | ``EleutherAI/gpt_neo_xl`` | | 24-layer, 2048-hidden, 16-heads, 1.3B parameters. | | GPTNeo | ``EleutherAI/gpt-neo-1.3B`` | | 24-layer, 2048-hidden, 16-heads, 1.3B parameters. |
| | | | EleutherAI's GPT-3 like language model. | | | | | EleutherAI's GPT-3 like language model. |
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| | ``EleutherAI/gpt_neo_2.7B`` | | 32-layer, 2560-hidden, 20-heads, 2.7B parameters. | | | ``EleutherAI/gpt-neo-2.7B`` | | 32-layer, 2560-hidden, 20-heads, 2.7B parameters. |
| | | | EleutherAI's GPT-3 like language model. | | | | | EleutherAI's GPT-3 like language model. |
+--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ +--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| Transformer-XL | ``transfo-xl-wt103`` | | 18-layer, 1024-hidden, 16-heads, 257M parameters. | | Transformer-XL | ``transfo-xl-wt103`` | | 18-layer, 1024-hidden, 16-heads, 257M parameters. |
......
...@@ -418,6 +418,7 @@ MODEL_FOR_PRETRAINING_MAPPING = OrderedDict( ...@@ -418,6 +418,7 @@ MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
MODEL_WITH_LM_HEAD_MAPPING = OrderedDict( MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
[ [
# Model with LM heads mapping # Model with LM heads mapping
(GPTNeoConfig, GPTNeoForCausalLM),
(BigBirdConfig, BigBirdForMaskedLM), (BigBirdConfig, BigBirdForMaskedLM),
(Speech2TextConfig, Speech2TextForConditionalGeneration), (Speech2TextConfig, Speech2TextForConditionalGeneration),
(Wav2Vec2Config, Wav2Vec2ForMaskedLM), (Wav2Vec2Config, Wav2Vec2ForMaskedLM),
......
...@@ -21,7 +21,7 @@ from ...utils import logging ...@@ -21,7 +21,7 @@ from ...utils import logging
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
GPT_NEO_PRETRAINED_CONFIG_ARCHIVE_MAP = { GPT_NEO_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"EleutherAI/gpt_neo_xl": "https://huggingface.co/EleutherAI/gpt_neo_xl/resolve/main/config.json", "EleutherAI/gpt-neo-1.3B": "https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json",
# See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo # See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
} }
...@@ -30,8 +30,8 @@ class GPTNeoConfig(PretrainedConfig): ...@@ -30,8 +30,8 @@ class GPTNeoConfig(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.GPTNeoModel`. It is used to This is the configuration class to store the configuration of a :class:`~transformers.GPTNeoModel`. It is used to
instantiate a GPT Neo model according to the specified arguments, defining the model architecture. Instantiating a instantiate a GPT Neo model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt_neo_xl configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt-neo-1.3B
<https://huggingface.co/EleutherAI/gpt_neo_xl>`__ architecture. <https://huggingface.co/EleutherAI/gpt-neo-1.3B>`__ architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
...@@ -81,10 +81,10 @@ class GPTNeoConfig(PretrainedConfig): ...@@ -81,10 +81,10 @@ class GPTNeoConfig(PretrainedConfig):
>>> from transformers import GPTNeoModel, GPTNeoConfig >>> from transformers import GPTNeoModel, GPTNeoConfig
>>> # Initializing a GPTNeo EleutherAI/gpt_neo_xl style configuration >>> # Initializing a GPTNeo EleutherAI/gpt-neo-1.3B style configuration
>>> configuration = GPTNeoConfig() >>> configuration = GPTNeoConfig()
>>> # Initializing a model from the EleutherAI/gpt_neo_xl style configuration >>> # Initializing a model from the EleutherAI/gpt-neo-1.3B style configuration
>>> model = GPTNeoModel(configuration) >>> model = GPTNeoModel(configuration)
>>> # Accessing the model configuration >>> # Accessing the model configuration
......
...@@ -43,11 +43,11 @@ _CONFIG_FOR_DOC = "GPTNeoConfig" ...@@ -43,11 +43,11 @@ _CONFIG_FOR_DOC = "GPTNeoConfig"
_TOKENIZER_FOR_DOC = "GPT2Tokenizer" _TOKENIZER_FOR_DOC = "GPT2Tokenizer"
GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST = [ GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST = [
"EleutherAI/gpt_neo_xl", "EleutherAI/gpt-neo-1.3B",
# See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo # See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
] ]
_CHECKPOINT_FOR_DOC = "EleutherAI/gpt_neo_xl" _CHECKPOINT_FOR_DOC = "EleutherAI/gpt-neo-1.3B"
def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path): def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):
......
...@@ -35,6 +35,7 @@ class TextGenerationPipeline(Pipeline): ...@@ -35,6 +35,7 @@ class TextGenerationPipeline(Pipeline):
"TransfoXLLMHeadModel", "TransfoXLLMHeadModel",
"ReformerModelWithLMHead", "ReformerModelWithLMHead",
"GPT2LMHeadModel", "GPT2LMHeadModel",
"GPTNeoForCausalLM",
"OpenAIGPTLMHeadModel", "OpenAIGPTLMHeadModel",
"CTRLLMHeadModel", "CTRLLMHeadModel",
"TFXLNetLMHeadModel", "TFXLNetLMHeadModel",
......
...@@ -432,7 +432,7 @@ class GPTNeoModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase ...@@ -432,7 +432,7 @@ class GPTNeoModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase
@slow @slow
def test_batch_generation(self): def test_batch_generation(self):
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl") model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
model.to(torch_device) model.to(torch_device)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
...@@ -486,7 +486,7 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase): ...@@ -486,7 +486,7 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase):
@slow @slow
def test_lm_generate_gpt_neo(self): def test_lm_generate_gpt_neo(self):
for checkpointing in [True, False]: for checkpointing in [True, False]:
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl", gradient_checkpointing=checkpointing) model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B", gradient_checkpointing=checkpointing)
model.to(torch_device) model.to(torch_device)
input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog
# fmt: off # fmt: off
...@@ -497,8 +497,8 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase): ...@@ -497,8 +497,8 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase):
@slow @slow
def test_gpt_neo_sample(self): def test_gpt_neo_sample(self):
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl") tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl") model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
model.to(torch_device) model.to(torch_device)
torch.manual_seed(0) torch.manual_seed(0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment