Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
21734901
Commit
21734901
authored
Jan 29, 2020
by
Lysandre
Browse files
Copy object instead of passing the reference
parent
adb8c931
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
1 deletion
+14
-1
src/transformers/tokenization_utils.py
src/transformers/tokenization_utils.py
+1
-1
tests/test_tokenization_common.py
tests/test_tokenization_common.py
+13
-0
No files found.
src/transformers/tokenization_utils.py
View file @
21734901
...
...
@@ -326,7 +326,7 @@ class PreTrainedTokenizer(object):
cls
.
pretrained_init_configuration
and
pretrained_model_name_or_path
in
cls
.
pretrained_init_configuration
):
init_configuration
=
cls
.
pretrained_init_configuration
[
pretrained_model_name_or_path
]
init_configuration
=
cls
.
pretrained_init_configuration
[
pretrained_model_name_or_path
]
.
copy
()
else
:
# Get the vocabulary from local files
logger
.
info
(
...
...
tests/test_tokenization_common.py
View file @
21734901
...
...
@@ -495,3 +495,16 @@ class TokenizerTesterMixin:
assert
[
token_type_padding_idx
]
*
padding_size
+
token_type_ids
==
padded_token_type_ids
assert
[
0
]
*
padding_size
+
attention_mask
==
padded_attention_mask
assert
[
1
]
*
padding_size
+
special_tokens_mask
==
padded_special_tokens_mask
def
test_separate_tokenizers
(
self
):
# This tests that tokenizers don't impact others. Unfortunately the case where it fails is when
# we're loading an S3 configuration from a pre-trained identifier, and we have no way of testing those today.
tokenizer
=
self
.
get_tokenizer
(
random_argument
=
True
)
print
(
tokenizer
.
init_kwargs
)
assert
tokenizer
.
init_kwargs
[
'random_argument'
]
is
True
new_tokenizer
=
self
.
get_tokenizer
(
random_argument
=
False
)
print
(
tokenizer
.
init_kwargs
)
print
(
new_tokenizer
.
init_kwargs
)
assert
tokenizer
.
init_kwargs
[
'random_argument'
]
is
True
assert
new_tokenizer
.
init_kwargs
[
'random_argument'
]
is
False
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment