Unverified Commit 53496ac5 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[LongT5] Rename checkpoitns (#17700)

parent 3b29c9fd
...@@ -23,10 +23,10 @@ from ...utils import logging ...@@ -23,10 +23,10 @@ from ...utils import logging
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
LONGT5_PRETRAINED_CONFIG_ARCHIVE_MAP = { LONGT5_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"google/LongT5-Local-Base": "https://huggingface.co/google/LongT5-Local-Base/blob/main/config.json", "google/long-t5-local-base": "https://huggingface.co/google/long-t5-local-base/blob/main/config.json",
"google/LongT5-Local-Large": "https://huggingface.co/google/LongT5-Local-Large/blob/main/config.json", "google/long-t5-local-large": "https://huggingface.co/google/long-t5-local-large/blob/main/config.json",
"google/LongT5-TGlobal-Base": "https://huggingface.co/google/LongT5-TGlobal-Base/blob/main/config.json", "google/long-t5-tglobal-base": "https://huggingface.co/google/long-t5-tglobal-base/blob/main/config.json",
"google/LongT5-TGlobal-Large": "https://huggingface.co/google/LongT5-TGlobal-Large/blob/main/config.json", "google/long-t5-tglobal-large": "https://huggingface.co/google/long-t5-tglobal-large/blob/main/config.json",
} }
...@@ -35,7 +35,7 @@ class LongT5Config(PretrainedConfig): ...@@ -35,7 +35,7 @@ class LongT5Config(PretrainedConfig):
This is the configuration class to store the configuration of a [`LongT5Model`] or a [`FlaxLongT5Model`]. It is This is the configuration class to store the configuration of a [`LongT5Model`] or a [`FlaxLongT5Model`]. It is
used to instantiate a LongT5 model according to the specified arguments, defining the model architecture. used to instantiate a LongT5 model according to the specified arguments, defining the model architecture.
Instantiating a configuration with the defaults will yield a similar configuration to that of the LongT5 Instantiating a configuration with the defaults will yield a similar configuration to that of the LongT5
[google/LongT5-Local-Base](https://huggingface.co/google/LongT5-Local-Base) architecture. [google/long-t5-local-base](https://huggingface.co/google/long-t5-local-base) architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information. documentation from [`PretrainedConfig`] for more information.
......
...@@ -49,7 +49,7 @@ from .configuration_longt5 import LongT5Config ...@@ -49,7 +49,7 @@ from .configuration_longt5 import LongT5Config
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
_CHECKPOINT_FOR_DOC = "google/LongT5-Local-Base" _CHECKPOINT_FOR_DOC = "google/long-t5-local-base"
_CONFIG_FOR_DOC = "LongT5Config" _CONFIG_FOR_DOC = "LongT5Config"
_TOKENIZER_FOR_DOC = "T5Tokenizer" _TOKENIZER_FOR_DOC = "T5Tokenizer"
...@@ -1799,7 +1799,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel): ...@@ -1799,7 +1799,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel):
>>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration >>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base") >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
>>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, return_tensors="np") >>> inputs = tokenizer(text, return_tensors="np")
...@@ -1861,7 +1861,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel): ...@@ -1861,7 +1861,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel):
>>> import jax.numpy as jnp >>> import jax.numpy as jnp
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base") >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
>>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, return_tensors="np") >>> inputs = tokenizer(text, return_tensors="np")
...@@ -2080,7 +2080,7 @@ FLAX_LONGT5_MODEL_DOCSTRING = """ ...@@ -2080,7 +2080,7 @@ FLAX_LONGT5_MODEL_DOCSTRING = """
>>> from transformers import T5Tokenizer, FlaxLongT5Model >>> from transformers import T5Tokenizer, FlaxLongT5Model
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base") >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
>>> model = FlaxLongT5Model.from_pretrained("google/LongT5-Local-Base") >>> model = FlaxLongT5Model.from_pretrained("google/long-t5-local-base")
>>> input_ids = tokenizer( >>> input_ids = tokenizer(
... "Studies have been shown that owning a dog is good for you", return_tensors="np" ... "Studies have been shown that owning a dog is good for you", return_tensors="np"
...@@ -2233,7 +2233,7 @@ class FlaxLongT5ForConditionalGeneration(FlaxLongT5PreTrainedModel): ...@@ -2233,7 +2233,7 @@ class FlaxLongT5ForConditionalGeneration(FlaxLongT5PreTrainedModel):
>>> import jax.numpy as jnp >>> import jax.numpy as jnp
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base") >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
>>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
>>> text = "summarize: My friends are cool but they eat too many carbs." >>> text = "summarize: My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, return_tensors="np") >>> inputs = tokenizer(text, return_tensors="np")
...@@ -2381,7 +2381,7 @@ FLAX_LONGT5_CONDITIONAL_GENERATION_DOCSTRING = """ ...@@ -2381,7 +2381,7 @@ FLAX_LONGT5_CONDITIONAL_GENERATION_DOCSTRING = """
>>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration >>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base") >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
>>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
>>> ARTICLE_TO_SUMMARIZE = "summarize: My friends are cool but they eat too many carbs." >>> ARTICLE_TO_SUMMARIZE = "summarize: My friends are cool but they eat too many carbs."
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], return_tensors="np") >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], return_tensors="np")
......
...@@ -49,14 +49,14 @@ logger = logging.get_logger(__name__) ...@@ -49,14 +49,14 @@ logger = logging.get_logger(__name__)
_CONFIG_FOR_DOC = "LongT5Config" _CONFIG_FOR_DOC = "LongT5Config"
_TOKENIZER_FOR_DOC = "T5Tokenizer" _TOKENIZER_FOR_DOC = "T5Tokenizer"
_CHECKPOINT_FOR_DOC = "google/LongT5-Local-Base" _CHECKPOINT_FOR_DOC = "google/long-t5-local-base"
# TODO: Update before the merge # TODO: Update before the merge
LONGT5_PRETRAINED_MODEL_ARCHIVE_LIST = [ LONGT5_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/LongT5-Local-Base", "google/long-t5-local-base",
"google/LongT5-Local-Large", "google/long-t5-local-large",
"google/LongT5-TGlobal-Base", "google/long-t5-tglobal-base",
"google/LongT5-TGlobal-Large", "google/long-t5-tglobal-large",
] ]
...@@ -1797,8 +1797,8 @@ class LongT5Model(LongT5PreTrainedModel): ...@@ -1797,8 +1797,8 @@ class LongT5Model(LongT5PreTrainedModel):
```python ```python
>>> from transformers import T5Tokenizer, LongT5Model >>> from transformers import T5Tokenizer, LongT5Model
>>> tokenizer = T5Tokenizer.from_pretrained("google/LongT5-Local-Base") >>> tokenizer = T5Tokenizer.from_pretrained("google/long-t5-local-base")
>>> model = LongT5Model.from_pretrained("google/LongT5-Local-Base") >>> model = LongT5Model.from_pretrained("google/long-t5-local-base")
>>> # Let's try a very long encoder input. >>> # Let's try a very long encoder input.
>>> input_ids = tokenizer( >>> input_ids = tokenizer(
...@@ -2169,8 +2169,8 @@ class LongT5EncoderModel(LongT5PreTrainedModel): ...@@ -2169,8 +2169,8 @@ class LongT5EncoderModel(LongT5PreTrainedModel):
```python ```python
>>> from transformers import AutoTokenizer, LongT5ForConditionalGeneration >>> from transformers import AutoTokenizer, LongT5ForConditionalGeneration
>>> tokenizer = AutoTokenizer.from_pretrained("google/LongT5-Local-Base") >>> tokenizer = AutoTokenizer.from_pretrained("google/long-t5-local-base")
>>> model = LongT5EncoderModel.from_pretrained("google/LongT5-Local-Base") >>> model = LongT5EncoderModel.from_pretrained("google/long-t5-local-base")
>>> input_ids = tokenizer( >>> input_ids = tokenizer(
... 100 * "Studies have been shown that owning a dog is good for you ", return_tensors="pt" ... 100 * "Studies have been shown that owning a dog is good for you ", return_tensors="pt"
... ).input_ids # Batch size 1 ... ).input_ids # Batch size 1
......
...@@ -68,7 +68,7 @@ class LongT5ModelTester: ...@@ -68,7 +68,7 @@ class LongT5ModelTester:
decoder_start_token_id=0, decoder_start_token_id=0,
scope=None, scope=None,
decoder_layers=None, decoder_layers=None,
large_model_config_path="google/LongT5-Local-Large", large_model_config_path="google/long-t5-local-large",
): ):
self.parent = parent self.parent = parent
...@@ -755,7 +755,7 @@ class LongT5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase ...@@ -755,7 +755,7 @@ class LongT5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase
class LongT5TGlobalModelTest(LongT5ModelTest): class LongT5TGlobalModelTest(LongT5ModelTest):
def setUp(self): def setUp(self):
self.model_tester = LongT5ModelTester( self.model_tester = LongT5ModelTester(
self, encoder_attention_type="transient-global", large_model_config_path="google/LongT5-TGlobal-Large" self, encoder_attention_type="transient-global", large_model_config_path="google/long-t5-tglobal-large"
) )
self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37) self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37)
...@@ -912,7 +912,7 @@ class LongT5EncoderOnlyModelTester: ...@@ -912,7 +912,7 @@ class LongT5EncoderOnlyModelTester:
eos_token_id=1, eos_token_id=1,
pad_token_id=0, pad_token_id=0,
scope=None, scope=None,
large_model_config_path="google/LongT5-Local-Large", large_model_config_path="google/long-t5-local-large",
): ):
self.parent = parent self.parent = parent
...@@ -1095,7 +1095,7 @@ class LongT5EncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -1095,7 +1095,7 @@ class LongT5EncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase):
class LongT5EncoderOnlyTGlobalModelTest(LongT5EncoderOnlyModelTest): class LongT5EncoderOnlyTGlobalModelTest(LongT5EncoderOnlyModelTest):
def setUp(self): def setUp(self):
self.model_tester = LongT5EncoderOnlyModelTester( self.model_tester = LongT5EncoderOnlyModelTester(
self, encoder_attention_type="transient-global", large_model_config_path="google/LongT5-TGlobal-Large" self, encoder_attention_type="transient-global", large_model_config_path="google/long-t5-tglobal-large"
) )
self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37) self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37)
......
...@@ -213,8 +213,8 @@ PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = { ...@@ -213,8 +213,8 @@ PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {
("blenderbot-small", "facebook/blenderbot_small-90M"), ("blenderbot-small", "facebook/blenderbot_small-90M"),
("blenderbot", "facebook/blenderbot-400M-distill"), ("blenderbot", "facebook/blenderbot-400M-distill"),
("bigbird-pegasus", "google/bigbird-pegasus-large-arxiv"), ("bigbird-pegasus", "google/bigbird-pegasus-large-arxiv"),
("longt5", "google/LongT5-Local-Base"), ("longt5", "google/long-t5-local-base"),
("longt5", "google/LongT5-TGlobal-Base"), ("longt5", "google/long-t5-tglobal-base"),
} }
# TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_MODELS` once TensorFlow has parity with the PyTorch model implementations. # TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_MODELS` once TensorFlow has parity with the PyTorch model implementations.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment