Unverified Commit 8ba4c588 authored by Thomas Wolf's avatar Thomas Wolf Committed by GitHub
Browse files

Allow a more backward compatible behavior of max_len_single_sentence and...

Allow a more backward compatible behavior of max_len_single_sentence and max_len_sentences_pair (#3994)

* Allow a more backward compatible behavior of max_len_single_sentence and max_len_sentences_pair and

* The style and quality are now top-notch
parent 847e7f33
...@@ -785,6 +785,30 @@ class PreTrainedTokenizer(SpecialTokensMixin): ...@@ -785,6 +785,30 @@ class PreTrainedTokenizer(SpecialTokensMixin):
def max_len_sentences_pair(self): def max_len_sentences_pair(self):
return self.model_max_length - self.num_special_tokens_to_add(pair=True) return self.model_max_length - self.num_special_tokens_to_add(pair=True)
@max_len_single_sentence.setter
def max_len_single_sentence(self, value):
""" For backward compatibility, allow to try to setup 'max_len_single_sentence' """
if value == self.model_max_length - self.num_special_tokens_to_add(pair=False):
logger.warning(
"Setting 'max_len_single_sentence' is now deprecated. " "This value is automatically set up."
)
else:
raise ValueError(
"Setting 'max_len_single_sentence' is now deprecated. " "This value is automatically set up."
)
@max_len_sentences_pair.setter
def max_len_sentences_pair(self, value):
""" For backward compatibility, allow to try to setup 'max_len_sentences_pair' """
if value == self.model_max_length - self.num_special_tokens_to_add(pair=True):
logger.warning(
"Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up."
)
else:
raise ValueError(
"Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up."
)
def get_vocab(self): def get_vocab(self):
""" Returns the vocabulary as a dict of {token: index} pairs. `tokenizer.get_vocab()[token]` is equivalent to `tokenizer.convert_tokens_to_ids(token)` when `token` is in the vocab. """ """ Returns the vocabulary as a dict of {token: index} pairs. `tokenizer.get_vocab()[token]` is equivalent to `tokenizer.convert_tokens_to_ids(token)` when `token` is in the vocab. """
raise NotImplementedError() raise NotImplementedError()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment