Unverified Commit 9429642e authored by Arthur's avatar Arthur Committed by GitHub
Browse files

[`T5/LlamaTokenizer`] default legacy to `None` to not always warn (#25131)

default legacy to None
parent de9e3b59
...@@ -111,7 +111,7 @@ class LlamaTokenizer(PreTrainedTokenizer): ...@@ -111,7 +111,7 @@ class LlamaTokenizer(PreTrainedTokenizer):
add_bos_token=True, add_bos_token=True,
add_eos_token=False, add_eos_token=False,
clean_up_tokenization_spaces=False, clean_up_tokenization_spaces=False,
legacy=True, legacy=None,
**kwargs, **kwargs,
): ):
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
...@@ -131,11 +131,13 @@ class LlamaTokenizer(PreTrainedTokenizer): ...@@ -131,11 +131,13 @@ class LlamaTokenizer(PreTrainedTokenizer):
legacy=legacy, legacy=legacy,
**kwargs, **kwargs,
) )
if legacy: if legacy is None:
logger.warning_once( logger.warning_once(
f"You are using the legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to" f"You are using the default legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
" read the related pull request available at https://github.com/huggingface/transformers/pull/24565" " read the related pull request available at https://github.com/huggingface/transformers/pull/24565, and set the legacy attribute accordingly."
) )
legacy = True
self.legacy = legacy self.legacy = legacy
self.vocab_file = vocab_file self.vocab_file = vocab_file
self.add_bos_token = add_bos_token self.add_bos_token = add_bos_token
......
...@@ -148,7 +148,7 @@ class T5Tokenizer(PreTrainedTokenizer): ...@@ -148,7 +148,7 @@ class T5Tokenizer(PreTrainedTokenizer):
extra_ids=100, extra_ids=100,
additional_special_tokens=None, additional_special_tokens=None,
sp_model_kwargs: Optional[Dict[str, Any]] = None, sp_model_kwargs: Optional[Dict[str, Any]] = None,
legacy=True, legacy=None,
**kwargs, **kwargs,
) -> None: ) -> None:
# Add extra_ids to the special token list # Add extra_ids to the special token list
...@@ -163,11 +163,12 @@ class T5Tokenizer(PreTrainedTokenizer): ...@@ -163,11 +163,12 @@ class T5Tokenizer(PreTrainedTokenizer):
" provided to T5Tokenizer. In this case the additional_special_tokens must include the extra_ids" " provided to T5Tokenizer. In this case the additional_special_tokens must include the extra_ids"
" tokens" " tokens"
) )
if legacy: if legacy is None:
logger.warning_once( logger.warning_once(
f"You are using the legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to" f"You are using the default legacy behaviour of the {self.__class__}. This means that tokens that come after special tokens will not be properly handled. We recommend you to"
" read the related pull request available at https://github.com/huggingface/transformers/pull/24565" " read the related pull request available at https://github.com/huggingface/transformers/pull/24565, and set the legacy attribute accordingly."
) )
legacy = True
self.legacy = legacy self.legacy = legacy
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment