Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
1a237d7f
Unverified
Commit
1a237d7f
authored
Nov 14, 2019
by
Thomas Wolf
Committed by
GitHub
Nov 14, 2019
Browse files
Merge pull request #1831 from iedmrc/gpt2-tokenization-sum-func-replacement
sum() is replaced by itertools.chain.from_iterable()
parents
df99f8c5
7627dde1
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
2 deletions
+3
-2
transformers/tokenization_utils.py
transformers/tokenization_utils.py
+3
-2
No files found.
transformers/tokenization_utils.py
View file @
1a237d7f
...
@@ -21,6 +21,7 @@ import os
...
@@ -21,6 +21,7 @@ import os
import
json
import
json
import
six
import
six
import
copy
import
copy
import
itertools
from
io
import
open
from
io
import
open
from
.file_utils
import
cached_path
,
is_tf_available
,
is_torch_available
from
.file_utils
import
cached_path
,
is_tf_available
,
is_torch_available
...
@@ -641,9 +642,9 @@ class PreTrainedTokenizer(object):
...
@@ -641,9 +642,9 @@ class PreTrainedTokenizer(object):
tokenized_text
+=
[
sub_text
]
tokenized_text
+=
[
sub_text
]
text_list
=
tokenized_text
text_list
=
tokenized_text
return
sum
((
self
.
_tokenize
(
token
,
**
kwargs
)
if
token
not
\
return
list
(
itertools
.
chain
.
from_iterable
((
self
.
_tokenize
(
token
,
**
kwargs
)
if
token
not
\
in
self
.
added_tokens_encoder
and
token
not
in
self
.
all_special_tokens
\
in
self
.
added_tokens_encoder
and
token
not
in
self
.
all_special_tokens
\
else
[
token
]
for
token
in
tokenized_text
)
,
[]
)
else
[
token
]
for
token
in
tokenized_text
)
)
)
added_tokens
=
list
(
self
.
added_tokens_encoder
.
keys
())
+
self
.
all_special_tokens
added_tokens
=
list
(
self
.
added_tokens_encoder
.
keys
())
+
self
.
all_special_tokens
tokenized_text
=
split_on_tokens
(
added_tokens
,
text
)
tokenized_text
=
split_on_tokens
(
added_tokens
,
text
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment