Commit 21734901 authored by Lysandre's avatar Lysandre
Browse files

Copy object instead of passing the reference

parent adb8c931
......@@ -326,7 +326,7 @@ class PreTrainedTokenizer(object):
cls.pretrained_init_configuration
and pretrained_model_name_or_path in cls.pretrained_init_configuration
):
init_configuration = cls.pretrained_init_configuration[pretrained_model_name_or_path]
init_configuration = cls.pretrained_init_configuration[pretrained_model_name_or_path].copy()
else:
# Get the vocabulary from local files
logger.info(
......
......@@ -495,3 +495,16 @@ class TokenizerTesterMixin:
assert [token_type_padding_idx] * padding_size + token_type_ids == padded_token_type_ids
assert [0] * padding_size + attention_mask == padded_attention_mask
assert [1] * padding_size + special_tokens_mask == padded_special_tokens_mask
def test_separate_tokenizers(self):
# This tests that tokenizers don't impact others. Unfortunately the case where it fails is when
# we're loading an S3 configuration from a pre-trained identifier, and we have no way of testing those today.
tokenizer = self.get_tokenizer(random_argument=True)
print(tokenizer.init_kwargs)
assert tokenizer.init_kwargs['random_argument'] is True
new_tokenizer = self.get_tokenizer(random_argument=False)
print(tokenizer.init_kwargs)
print(new_tokenizer.init_kwargs)
assert tokenizer.init_kwargs['random_argument'] is True
assert new_tokenizer.init_kwargs['random_argument'] is False
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment