Fix circular reference issue in CLIPTokenizerFast (#31075)

b72752f0 · Dhaivat Bhatt · GitHub · 464d986b · b72752f0
Unverified Commit b72752f0 authored Jun 05, 2024 by Dhaivat Bhatt Committed by GitHub Jun 05, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 2 deletions

src/transformers/models/clip/tokenization_clip_fast.py src/transformers/models/clip/tokenization_clip_fast.py +5 -2

No files found.
--- a/src/transformers/models/clip/tokenization_clip_fast.py
+++ b/src/transformers/models/clip/tokenization_clip_fast.py
@@ -89,16 +89,19 @@ class CLIPTokenizerFast(PreTrainedTokenizerFast):
                " to use your existing tokenizer, you will have to revert to a version prior to 4.17.0 of"
                " transformers."
            )
        self._wrap_decode_method_backend_tokenizer()
    # Very ugly hack to enable padding to have a correct decoding see https://github.com/huggingface/tokenizers/issues/872
    def _wrap_decode_method_backend_tokenizer(self):
        orig_decode_method = self.backend_tokenizer.decode
+        ## define this as a local variable to avoid circular reference
+        ## See: https://github.com/huggingface/transformers/issues/30930
+        end_of_word_suffix = self.backend_tokenizer.model.end_of_word_suffix
        def new_decode_method(*args, **kwargs):
            text = orig_decode_method(*args, **kwargs)
-            text = text.replace(self.backend_tokenizer.model.end_of_word_suffix, " ").strip()
+            text = text.replace(end_of_word_suffix, " ").strip()
            return text
        self.backend_tokenizer.decode = new_decode_method