Unverified Commit 8194df8e authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Warning on `add_special_tokens` (#2966)

Warning on `add_special_tokens` when passed to `encode`, `encode_plus` and `batch_encode_plus`
parent 38f5fe9e
...@@ -1704,6 +1704,13 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer): ...@@ -1704,6 +1704,13 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer):
return_offsets_mapping=False, return_offsets_mapping=False,
**kwargs **kwargs
): ):
if not add_special_tokens:
logger.warning(
"Fast tokenizers add special tokens by default. To remove special tokens, please specify"
"`add_special_tokens=False` during the initialisation rather than when calling `encode`,"
"`encode_plus` or `batch_encode_plus`."
)
# Needed if we have to return a tensor # Needed if we have to return a tensor
pad_to_max_length = pad_to_max_length or (return_tensors is not None) pad_to_max_length = pad_to_max_length or (return_tensors is not None)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment