Unverified Commit 9429642e authored by Arthur's avatar Arthur Committed by GitHub
Browse files

[`T5/LlamaTokenizer`] default legacy to `None` to not always warn (#25131)

default legacy to None
parent de9e3b59
......@@ -111,7 +111,7 @@ class LlamaTokenizer(PreTrainedTokenizer):
add_bos_token=True,
add_eos_token=False,
clean_up_tokenization_spaces=False,
legacy=True,
legacy=None,
**kwargs,
):
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
......@@ -131,11 +131,13 @@ class LlamaTokenizer(PreTrainedTokenizer):
legacy=legacy,
**kwargs,
)
if legacy:
if legacy is None:
logger.warning_once(
f"You are using the legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
" read the related pull request available at https://github.com/huggingface/transformers/pull/24565"
f"You are using the default legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
" read the related pull request available at https://github.com/huggingface/transformers/pull/24565, and set the legacy attribute accordingly."
)
legacy = True
self.legacy = legacy
self.vocab_file = vocab_file
self.add_bos_token = add_bos_token
......
......@@ -148,7 +148,7 @@ class T5Tokenizer(PreTrainedTokenizer):
extra_ids=100,
additional_special_tokens=None,
sp_model_kwargs: Optional[Dict[str, Any]] = None,
legacy=True,
legacy=None,
**kwargs,
) -> None:
# Add extra_ids to the special token list
......@@ -163,11 +163,12 @@ class T5Tokenizer(PreTrainedTokenizer):
" provided to T5Tokenizer. In this case the additional_special_tokens must include the extra_ids"
" tokens"
)
if legacy:
if legacy is None:
logger.warning_once(
f"You are using the legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
" read the related pull request available at https://github.com/huggingface/transformers/pull/24565"
f"You are using the default legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
" read the related pull request available at https://github.com/huggingface/transformers/pull/24565, and set the legacy attribute accordingly."
)
legacy = True
self.legacy = legacy
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment