Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
15d19ecf
Unverified
Commit
15d19ecf
authored
Jul 21, 2021
by
Philip May
Committed by
GitHub
Jul 21, 2021
Browse files
fix convert_tokens_to_string calls (#11716)
parent
c3d9ac76
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
6 additions
and
11 deletions
+6
-11
src/transformers/models/albert/tokenization_albert.py
src/transformers/models/albert/tokenization_albert.py
+1
-2
src/transformers/models/barthez/tokenization_barthez.py
src/transformers/models/barthez/tokenization_barthez.py
+1
-2
src/transformers/models/camembert/tokenization_camembert.py
src/transformers/models/camembert/tokenization_camembert.py
+1
-2
src/transformers/models/m2m_100/tokenization_m2m_100.py
src/transformers/models/m2m_100/tokenization_m2m_100.py
+1
-2
src/transformers/models/mbart/tokenization_mbart50.py
src/transformers/models/mbart/tokenization_mbart50.py
+1
-2
src/transformers/models/speech_to_text/tokenization_speech_to_text.py
...mers/models/speech_to_text/tokenization_speech_to_text.py
+1
-1
No files found.
src/transformers/models/albert/tokenization_albert.py
View file @
15d19ecf
...
...
@@ -238,8 +238,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
return
self
.
sp_model
.
IdToPiece
(
index
)
def
convert_tokens_to_string
(
self
,
tokens
):
out_string
=
""
.
join
(
tokens
).
replace
(
SPIECE_UNDERLINE
,
" "
).
strip
()
return
out_string
return
self
.
sp_model
.
decode
(
tokens
)
def
build_inputs_with_special_tokens
(
self
,
token_ids_0
:
List
[
int
],
token_ids_1
:
Optional
[
List
[
int
]]
=
None
...
...
src/transformers/models/barthez/tokenization_barthez.py
View file @
15d19ecf
...
...
@@ -271,8 +271,7 @@ class BarthezTokenizer(PreTrainedTokenizer):
def
convert_tokens_to_string
(
self
,
tokens
):
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string
=
""
.
join
(
tokens
).
replace
(
SPIECE_UNDERLINE
,
" "
).
strip
()
return
out_string
return
self
.
sp_model
.
decode
(
tokens
)
def
save_vocabulary
(
self
,
save_directory
:
str
,
filename_prefix
:
Optional
[
str
]
=
None
)
->
Tuple
[
str
]:
if
not
os
.
path
.
isdir
(
save_directory
):
...
...
src/transformers/models/camembert/tokenization_camembert.py
View file @
15d19ecf
...
...
@@ -271,8 +271,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
def
convert_tokens_to_string
(
self
,
tokens
):
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string
=
""
.
join
(
tokens
).
replace
(
SPIECE_UNDERLINE
,
" "
).
strip
()
return
out_string
return
self
.
sp_model
.
decode
(
tokens
)
def
save_vocabulary
(
self
,
save_directory
:
str
,
filename_prefix
:
Optional
[
str
]
=
None
)
->
Tuple
[
str
]:
if
not
os
.
path
.
isdir
(
save_directory
):
...
...
src/transformers/models/m2m_100/tokenization_m2m_100.py
View file @
15d19ecf
...
...
@@ -202,8 +202,7 @@ class M2M100Tokenizer(PreTrainedTokenizer):
def
convert_tokens_to_string
(
self
,
tokens
:
List
[
str
])
->
str
:
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string
=
""
.
join
(
tokens
).
replace
(
SPIECE_UNDERLINE
,
" "
).
strip
()
return
out_string
return
self
.
sp_model
.
decode
(
tokens
)
def
get_special_tokens_mask
(
self
,
token_ids_0
:
List
[
int
],
token_ids_1
:
Optional
[
List
[
int
]]
=
None
,
already_has_special_tokens
:
bool
=
False
...
...
src/transformers/models/mbart/tokenization_mbart50.py
View file @
15d19ecf
...
...
@@ -228,8 +228,7 @@ class MBart50Tokenizer(PreTrainedTokenizer):
def
convert_tokens_to_string
(
self
,
tokens
:
List
[
str
])
->
str
:
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string
=
""
.
join
(
tokens
).
replace
(
SPIECE_UNDERLINE
,
" "
).
strip
()
return
out_string
return
self
.
sp_model
.
decode
(
tokens
)
def
save_vocabulary
(
self
,
save_directory
:
str
,
filename_prefix
:
Optional
[
str
]
=
None
)
->
Tuple
[
str
]:
if
not
os
.
path
.
isdir
(
save_directory
):
...
...
src/transformers/models/speech_to_text/tokenization_speech_to_text.py
View file @
15d19ecf
...
...
@@ -185,7 +185,7 @@ class Speech2TextTokenizer(PreTrainedTokenizer):
def
convert_tokens_to_string
(
self
,
tokens
:
List
[
str
])
->
str
:
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string
=
""
.
join
(
tokens
).
replace
(
SPIECE_UNDERLINE
,
" "
).
strip
(
)
out_string
=
self
.
sp_model
.
decode
(
tokens
)
if
self
.
do_upper_case
:
out_string
=
out_string
.
upper
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment