[`T5/LlamaTokenizer`] default legacy to `None` to not always warn (#25131)

default legacy to None

[`T5/LlamaTokenizer`] default legacy to `None` to not always warn (#25131)
default legacy to None
9429642e · Arthur · GitHub · de9e3b59 · 9429642e · 9429642e
Unverified Commit 9429642e authored Jul 27, 2023 by Arthur Committed by GitHub Jul 27, 2023
Showing with 11 additions and 8 deletions

src/transformers/models/llama/tokenization_llama.py src/transformers/models/llama/tokenization_llama.py +6 -4

src/transformers/models/t5/tokenization_t5.py src/transformers/models/t5/tokenization_t5.py +5 -4

No files found.
--- a/src/transformers/models/llama/tokenization_llama.py
+++ b/src/transformers/models/llama/tokenization_llama.py
@@ -111,7 +111,7 @@ class LlamaTokenizer(PreTrainedTokenizer):
        add_bos_token=True,
        add_eos_token=False,
        clean_up_tokenization_spaces=False,
-        legacy=True,
+        legacy=None,
        **kwargs,
    ):
        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
@@ -131,11 +131,13 @@ class LlamaTokenizer(PreTrainedTokenizer):
            legacy=legacy,
            **kwargs,
        )
-        if legacy:
+        if legacy is None:
            logger.warning_once(
-                f"You are using the legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
+                f"You are using the default legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
-                " read the related pull request available at https://github.com/huggingface/transformers/pull/24565"
+                " read the related pull request available at https://github.com/huggingface/transformers/pull/24565, and set the legacy attribute accordingly."
            )
+            legacy = True
        self.legacy = legacy
        self.vocab_file = vocab_file
        self.add_bos_token = add_bos_token

--- a/src/transformers/models/t5/tokenization_t5.py
+++ b/src/transformers/models/t5/tokenization_t5.py
@@ -148,7 +148,7 @@ class T5Tokenizer(PreTrainedTokenizer):
        extra_ids=100,
        additional_special_tokens=None,
        sp_model_kwargs: Optional[Dict[str, Any]] = None,
-        legacy=True,
+        legacy=None,
        **kwargs,
    ) -> None:
        # Add extra_ids to the special token list
@@ -163,11 +163,12 @@ class T5Tokenizer(PreTrainedTokenizer):
                    " provided to T5Tokenizer. In this case the additional_special_tokens must include the extra_ids"
                    " tokens"
                )
-        if legacy:
+        if legacy is None:
            logger.warning_once(
-                f"You are using the legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
+                f"You are using the default legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
-                " read the related pull request available at https://github.com/huggingface/transformers/pull/24565"
+                " read the related pull request available at https://github.com/huggingface/transformers/pull/24565, and set the legacy attribute accordingly."
            )
+            legacy = True
        self.legacy = legacy
        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs