Allow a more backward compatible behavior of max_len_single_sentence and...

Allow a more backward compatible behavior of max_len_single_sentence and max_len_sentences_pair (#3994) * Allow a more backward compatible behavior of max_len_single_sentence and max_len_sentences_pair and * The style and quality are now top-notch

Allow a more backward compatible behavior of max_len_single_sentence and...
Allow a more backward compatible behavior of max_len_single_sentence and max_len_sentences_pair (#3994) * Allow a more backward compatible behavior of max_len_single_sentence and max_len_sentences_pair and * The style and quality are now top-notch
8ba4c588 · Thomas Wolf · GitHub · 847e7f33 · 8ba4c588
Unverified Commit 8ba4c588 authored Apr 29, 2020 by Thomas Wolf Committed by GitHub Apr 29, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 24 additions and 0 deletions

src/transformers/tokenization_utils.py src/transformers/tokenization_utils.py +24 -0

No files found.
--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@@ -785,6 +785,30 @@ class PreTrainedTokenizer(SpecialTokensMixin):
    def max_len_sentences_pair(self):
        return self.model_max_length - self.num_special_tokens_to_add(pair=True)
+    @max_len_single_sentence.setter
+    def max_len_single_sentence(self, value):
+        """ For backward compatibility, allow to try to setup 'max_len_single_sentence' """
+        if value == self.model_max_length - self.num_special_tokens_to_add(pair=False):
+            logger.warning(
+                "Setting 'max_len_single_sentence' is now deprecated. " "This value is automatically set up."
+            )
+        else:
+            raise ValueError(
+                "Setting 'max_len_single_sentence' is now deprecated. " "This value is automatically set up."
+            )
+    @max_len_sentences_pair.setter
+    def max_len_sentences_pair(self, value):
+        """ For backward compatibility, allow to try to setup 'max_len_sentences_pair' """
+        if value == self.model_max_length - self.num_special_tokens_to_add(pair=True):
+            logger.warning(
+                "Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up."
+            )
+        else:
+            raise ValueError(
+                "Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up."
+            )
    def get_vocab(self):
        """ Returns the vocabulary as a dict of {token: index} pairs. `tokenizer.get_vocab()[token]` is equivalent to `tokenizer.convert_tokens_to_ids(token)` when `token` is in the vocab. """
        raise NotImplementedError()