Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
21734901
Commit
21734901
authored
Jan 29, 2020
by
Lysandre
Browse files
Copy object instead of passing the reference
parent
adb8c931
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
1 deletion
+14
-1
src/transformers/tokenization_utils.py
src/transformers/tokenization_utils.py
+1
-1
tests/test_tokenization_common.py
tests/test_tokenization_common.py
+13
-0
No files found.
src/transformers/tokenization_utils.py
View file @
21734901
...
...
@@ -326,7 +326,7 @@ class PreTrainedTokenizer(object):
cls
.
pretrained_init_configuration
and
pretrained_model_name_or_path
in
cls
.
pretrained_init_configuration
):
init_configuration
=
cls
.
pretrained_init_configuration
[
pretrained_model_name_or_path
]
init_configuration
=
cls
.
pretrained_init_configuration
[
pretrained_model_name_or_path
]
.
copy
()
else
:
# Get the vocabulary from local files
logger
.
info
(
...
...
tests/test_tokenization_common.py
View file @
21734901
...
...
@@ -495,3 +495,16 @@ class TokenizerTesterMixin:
assert
[
token_type_padding_idx
]
*
padding_size
+
token_type_ids
==
padded_token_type_ids
assert
[
0
]
*
padding_size
+
attention_mask
==
padded_attention_mask
assert
[
1
]
*
padding_size
+
special_tokens_mask
==
padded_special_tokens_mask
def
test_separate_tokenizers
(
self
):
# This tests that tokenizers don't impact others. Unfortunately the case where it fails is when
# we're loading an S3 configuration from a pre-trained identifier, and we have no way of testing those today.
tokenizer
=
self
.
get_tokenizer
(
random_argument
=
True
)
print
(
tokenizer
.
init_kwargs
)
assert
tokenizer
.
init_kwargs
[
'random_argument'
]
is
True
new_tokenizer
=
self
.
get_tokenizer
(
random_argument
=
False
)
print
(
tokenizer
.
init_kwargs
)
print
(
new_tokenizer
.
init_kwargs
)
assert
tokenizer
.
init_kwargs
[
'random_argument'
]
is
True
assert
new_tokenizer
.
init_kwargs
[
'random_argument'
]
is
False
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment