Unverified Commit 3dea40b8 authored by Elman Mansimov's avatar Elman Mansimov Committed by GitHub
Browse files

fixing tokenization of extra_id symbols in T5Tokenizer. Related to issue 4021 (#4353)

parent 51397336
...@@ -503,6 +503,7 @@ class SpecialTokensMixin: ...@@ -503,6 +503,7 @@ class SpecialTokensMixin:
if key in self.SPECIAL_TOKENS_ATTRIBUTES: if key in self.SPECIAL_TOKENS_ATTRIBUTES:
if key == "additional_special_tokens": if key == "additional_special_tokens":
assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value) assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
setattr(self, key, value)
elif isinstance(value, AddedTokenFast): elif isinstance(value, AddedTokenFast):
setattr(self, key, str(value)) setattr(self, key, str(value))
elif isinstance(value, str): elif isinstance(value, str):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment