Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
40ca1336
"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "dde718e7a62bf8caa6623b5635ba02d6cb758c75"
Unverified
Commit
40ca1336
authored
Feb 15, 2023
by
Bruno Alvisio
Committed by
GitHub
Feb 15, 2023
Browse files
Fix passing kwargs to TFBertTokenizer (#21619)
parent
fc28c006
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
4 deletions
+13
-4
src/transformers/models/bert/tokenization_bert_tf.py
src/transformers/models/bert/tokenization_bert_tf.py
+13
-4
No files found.
src/transformers/models/bert/tokenization_bert_tf.py
View file @
40ca1336
...
...
@@ -114,15 +114,24 @@ class TFBertTokenizer(tf.keras.layers.Layer):
tf_tokenizer = TFBertTokenizer.from_tokenizer(tokenizer)
```
"""
do_lower_case
=
kwargs
.
pop
(
"do_lower_case"
,
None
)
do_lower_case
=
tokenizer
.
do_lower_case
if
do_lower_case
is
None
else
do_lower_case
cls_token_id
=
kwargs
.
pop
(
"cls_token_id"
,
None
)
cls_token_id
=
tokenizer
.
cls_token_id
if
cls_token_id
is
None
else
cls_token_id
sep_token_id
=
kwargs
.
pop
(
"sep_token_id"
,
None
)
sep_token_id
=
tokenizer
.
sep_token_id
if
sep_token_id
is
None
else
sep_token_id
pad_token_id
=
kwargs
.
pop
(
"pad_token_id"
,
None
)
pad_token_id
=
tokenizer
.
pad_token_id
if
pad_token_id
is
None
else
pad_token_id
vocab
=
tokenizer
.
get_vocab
()
vocab
=
sorted
([(
wordpiece
,
idx
)
for
wordpiece
,
idx
in
vocab
.
items
()],
key
=
lambda
x
:
x
[
1
])
vocab_list
=
[
entry
[
0
]
for
entry
in
vocab
]
return
cls
(
vocab_list
=
vocab_list
,
do_lower_case
=
tokenizer
.
do_lower_case
,
cls_token_id
=
tokenizer
.
cls_token_id
,
sep_token_id
=
tokenizer
.
sep_token_id
,
pad_token_id
=
tokenizer
.
pad_token_id
,
do_lower_case
=
do_lower_case
,
cls_token_id
=
cls_token_id
,
sep_token_id
=
sep_token_id
,
pad_token_id
=
pad_token_id
,
**
kwargs
,
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment