Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
cc360649
Unverified
Commit
cc360649
authored
Oct 14, 2021
by
Patrick von Platen
Committed by
GitHub
Oct 14, 2021
Browse files
up (#13988)
parent
5b6bd4e7
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
1 deletion
+17
-1
src/transformers/models/byt5/tokenization_byt5.py
src/transformers/models/byt5/tokenization_byt5.py
+1
-1
tests/test_tokenization_byt5.py
tests/test_tokenization_byt5.py
+16
-0
No files found.
src/transformers/models/byt5/tokenization_byt5.py
View file @
cc360649
...
...
@@ -237,7 +237,7 @@ class ByT5Tokenizer(PreTrainedTokenizer):
else
:
tok_string
=
bytes
([
ord
(
token
)])
bstring
+=
tok_string
string
=
bstring
.
decode
(
"utf-8"
)
string
=
bstring
.
decode
(
"utf-8"
,
errors
=
"ignore"
)
return
string
# ByT5Tokenizer has no vocab file
...
...
tests/test_tokenization_byt5.py
View file @
cc360649
...
...
@@ -290,6 +290,22 @@ class ByT5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
),
)
def
test_decode_single_bytes
(
self
):
tokenizer_list
=
[]
if
self
.
test_slow_tokenizer
:
tokenizer_list
.
append
((
self
.
tokenizer_class
,
self
.
get_tokenizer
()))
if
self
.
test_rust_tokenizer
:
tokenizer_list
.
append
((
self
.
rust_tokenizer_class
,
self
.
get_rust_tokenizer
()))
for
tokenizer_class
,
tokenizer_utils
in
tokenizer_list
:
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
tokenizer_utils
.
save_pretrained
(
tmp_dir
)
tokenizer
=
tokenizer_class
.
from_pretrained
(
tmp_dir
)
self
.
assertTrue
(
tokenizer
.
decode
([
255
])
==
""
)
# tokenizer can be instantiated without any pretrained files, so no need for pretrained tokenizer list
def
test_pretrained_model_lists
(
self
):
pass
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment