[LongT5] Rename checkpoitns (#17700)

53496ac5 · Patrick von Platen · GitHub · 3b29c9fd · 53496ac5 · 53496ac5
Unverified Commit 53496ac5 authored Jun 14, 2022 by Patrick von Platen Committed by GitHub Jun 14, 2022
5 changed files
--- a/src/transformers/models/longt5/configuration_longt5.py
+++ b/src/transformers/models/longt5/configuration_longt5.py
@@ -23,10 +23,10 @@ from ...utils import logging
 logger = logging.get_logger(__name__)
 LONGT5_PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    "google/LongT5-Local-Base": "https://huggingface.co/google/LongT5-Local-Base/blob/main/config.json",
+    "google/long-t5-local-base": "https://huggingface.co/google/long-t5-local-base/blob/main/config.json",
-    "google/LongT5-Local-Large": "https://huggingface.co/google/LongT5-Local-Large/blob/main/config.json",
+    "google/long-t5-local-large": "https://huggingface.co/google/long-t5-local-large/blob/main/config.json",
-    "google/LongT5-TGlobal-Base": "https://huggingface.co/google/LongT5-TGlobal-Base/blob/main/config.json",
+    "google/long-t5-tglobal-base": "https://huggingface.co/google/long-t5-tglobal-base/blob/main/config.json",
-    "google/LongT5-TGlobal-Large": "https://huggingface.co/google/LongT5-TGlobal-Large/blob/main/config.json",
+    "google/long-t5-tglobal-large": "https://huggingface.co/google/long-t5-tglobal-large/blob/main/config.json",
 }
@@ -35,7 +35,7 @@ class LongT5Config(PretrainedConfig):
    This is the configuration class to store the configuration of a [`LongT5Model`] or a [`FlaxLongT5Model`]. It is
    used to instantiate a LongT5 model according to the specified arguments, defining the model architecture.
    Instantiating a configuration with the defaults will yield a similar configuration to that of the LongT5
-    [google/LongT5-Local-Base](https://huggingface.co/google/LongT5-Local-Base) architecture.
+    [google/long-t5-local-base](https://huggingface.co/google/long-t5-local-base) architecture.
    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

--- a/src/transformers/models/longt5/modeling_flax_longt5.py
+++ b/src/transformers/models/longt5/modeling_flax_longt5.py
@@ -49,7 +49,7 @@ from .configuration_longt5 import LongT5Config
 logger = logging.get_logger(__name__)
-_CHECKPOINT_FOR_DOC = "google/LongT5-Local-Base"
+_CHECKPOINT_FOR_DOC = "google/long-t5-local-base"
 _CONFIG_FOR_DOC = "LongT5Config"
 _TOKENIZER_FOR_DOC = "T5Tokenizer"
@@ -1799,7 +1799,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel):
        >>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration
        >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
-        >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base")
+        >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
        >>> text = "My friends are cool but they eat too many carbs."
        >>> inputs = tokenizer(text, return_tensors="np")
@@ -1861,7 +1861,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel):
        >>> import jax.numpy as jnp
        >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
-        >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base")
+        >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
        >>> text = "My friends are cool but they eat too many carbs."
        >>> inputs = tokenizer(text, return_tensors="np")
@@ -2080,7 +2080,7 @@ FLAX_LONGT5_MODEL_DOCSTRING = """
    >>> from transformers import T5Tokenizer, FlaxLongT5Model
    >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
-    >>> model = FlaxLongT5Model.from_pretrained("google/LongT5-Local-Base")
+    >>> model = FlaxLongT5Model.from_pretrained("google/long-t5-local-base")
    >>> input_ids = tokenizer(
    ...     "Studies have been shown that owning a dog is good for you", return_tensors="np"
@@ -2233,7 +2233,7 @@ class FlaxLongT5ForConditionalGeneration(FlaxLongT5PreTrainedModel):
        >>> import jax.numpy as jnp
        >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
-        >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base")
+        >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
        >>> text = "summarize: My friends are cool but they eat too many carbs."
        >>> inputs = tokenizer(text, return_tensors="np")
@@ -2381,7 +2381,7 @@ FLAX_LONGT5_CONDITIONAL_GENERATION_DOCSTRING = """
    >>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration
    >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
-    >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base")
+    >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
    >>> ARTICLE_TO_SUMMARIZE = "summarize: My friends are cool but they eat too many carbs."
    >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], return_tensors="np")

--- a/src/transformers/models/longt5/modeling_longt5.py
+++ b/src/transformers/models/longt5/modeling_longt5.py
@@ -49,14 +49,14 @@ logger = logging.get_logger(__name__)
 _CONFIG_FOR_DOC = "LongT5Config"
 _TOKENIZER_FOR_DOC = "T5Tokenizer"
-_CHECKPOINT_FOR_DOC = "google/LongT5-Local-Base"
+_CHECKPOINT_FOR_DOC = "google/long-t5-local-base"
 # TODO: Update before the merge
 LONGT5_PRETRAINED_MODEL_ARCHIVE_LIST = [
-    "google/LongT5-Local-Base",
+    "google/long-t5-local-base",
-    "google/LongT5-Local-Large",
+    "google/long-t5-local-large",
-    "google/LongT5-TGlobal-Base",
+    "google/long-t5-tglobal-base",
-    "google/LongT5-TGlobal-Large",
+    "google/long-t5-tglobal-large",
 ]
@@ -1797,8 +1797,8 @@ class LongT5Model(LongT5PreTrainedModel):
        ```python
        >>> from transformers import T5Tokenizer, LongT5Model
-        >>> tokenizer = T5Tokenizer.from_pretrained("google/LongT5-Local-Base")
+        >>> tokenizer = T5Tokenizer.from_pretrained("google/long-t5-local-base")
-        >>> model = LongT5Model.from_pretrained("google/LongT5-Local-Base")
+        >>> model = LongT5Model.from_pretrained("google/long-t5-local-base")
        >>> # Let's try a very long encoder input.
        >>> input_ids = tokenizer(
@@ -2169,8 +2169,8 @@ class LongT5EncoderModel(LongT5PreTrainedModel):
        ```python
        >>> from transformers import AutoTokenizer, LongT5ForConditionalGeneration
-        >>> tokenizer = AutoTokenizer.from_pretrained("google/LongT5-Local-Base")
+        >>> tokenizer = AutoTokenizer.from_pretrained("google/long-t5-local-base")
-        >>> model = LongT5EncoderModel.from_pretrained("google/LongT5-Local-Base")
+        >>> model = LongT5EncoderModel.from_pretrained("google/long-t5-local-base")
        >>> input_ids = tokenizer(
        ...     100 * "Studies have been shown that owning a dog is good for you ", return_tensors="pt"
        ... ).input_ids  # Batch size 1

--- a/tests/models/longt5/test_modeling_longt5.py
+++ b/tests/models/longt5/test_modeling_longt5.py
@@ -68,7 +68,7 @@ class LongT5ModelTester:
        decoder_start_token_id=0,
        scope=None,
        decoder_layers=None,
-        large_model_config_path="google/LongT5-Local-Large",
+        large_model_config_path="google/long-t5-local-large",
    ):
        self.parent = parent
@@ -755,7 +755,7 @@ class LongT5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase
 class LongT5TGlobalModelTest(LongT5ModelTest):
    def setUp(self):
        self.model_tester = LongT5ModelTester(
-            self, encoder_attention_type="transient-global", large_model_config_path="google/LongT5-TGlobal-Large"
+            self, encoder_attention_type="transient-global", large_model_config_path="google/long-t5-tglobal-large"
        )
        self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37)
@@ -912,7 +912,7 @@ class LongT5EncoderOnlyModelTester:
        eos_token_id=1,
        pad_token_id=0,
        scope=None,
-        large_model_config_path="google/LongT5-Local-Large",
+        large_model_config_path="google/long-t5-local-large",
    ):
        self.parent = parent
@@ -1095,7 +1095,7 @@ class LongT5EncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase):
 class LongT5EncoderOnlyTGlobalModelTest(LongT5EncoderOnlyModelTest):
    def setUp(self):
        self.model_tester = LongT5EncoderOnlyModelTester(
-            self, encoder_attention_type="transient-global", large_model_config_path="google/LongT5-TGlobal-Large"
+            self, encoder_attention_type="transient-global", large_model_config_path="google/long-t5-tglobal-large"
        )
        self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37)

--- a/tests/onnx/test_onnx_v2.py
+++ b/tests/onnx/test_onnx_v2.py
@@ -213,8 +213,8 @@ PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {
    ("blenderbot-small", "facebook/blenderbot_small-90M"),
    ("blenderbot", "facebook/blenderbot-400M-distill"),
    ("bigbird-pegasus", "google/bigbird-pegasus-large-arxiv"),
-    ("longt5", "google/LongT5-Local-Base"),
+    ("longt5", "google/long-t5-local-base"),
-    ("longt5", "google/LongT5-TGlobal-Base"),
+    ("longt5", "google/long-t5-tglobal-base"),
 }
 # TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_MODELS` once TensorFlow has parity with the PyTorch model implementations.