Unverified Commit 203d4f82 authored by youkaichao's avatar youkaichao Committed by GitHub
Browse files

[Core][Bugfix] cache len of tokenizer (#3741)

parent 991143cf
......@@ -26,6 +26,7 @@ def get_cached_tokenizer(
tokenizer_all_special_tokens_extended = (
tokenizer.all_special_tokens_extended)
tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
tokenizer_len = len(tokenizer)
class CachedTokenizer(tokenizer.__class__):
......@@ -41,6 +42,9 @@ def get_cached_tokenizer(
def all_special_tokens_extended(self):
return tokenizer_all_special_tokens_extended
def __len__(self):
return tokenizer_len
CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}"
tokenizer.__class__ = CachedTokenizer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment