Commit 7627dde1 authored by İbrahim Ethem Demirci's avatar İbrahim Ethem Demirci
Browse files

sum() is the leanest method to flatten a string list, so it's been replaced by...

sum() is the leanest method to flatten a string list, so it's been replaced by itertools.chain.from_iterable()
parent 155c782a
...@@ -21,6 +21,7 @@ import os ...@@ -21,6 +21,7 @@ import os
import json import json
import six import six
import copy import copy
import itertools
from io import open from io import open
from .file_utils import cached_path, is_tf_available, is_torch_available from .file_utils import cached_path, is_tf_available, is_torch_available
...@@ -641,9 +642,9 @@ class PreTrainedTokenizer(object): ...@@ -641,9 +642,9 @@ class PreTrainedTokenizer(object):
tokenized_text += [sub_text] tokenized_text += [sub_text]
text_list = tokenized_text text_list = tokenized_text
return sum((self._tokenize(token, **kwargs) if token not \ return list(itertools.chain.from_iterable((self._tokenize(token, **kwargs) if token not \
in self.added_tokens_encoder and token not in self.all_special_tokens \ in self.added_tokens_encoder and token not in self.all_special_tokens \
else [token] for token in tokenized_text), []) else [token] for token in tokenized_text)))
added_tokens = list(self.added_tokens_encoder.keys()) + self.all_special_tokens added_tokens = list(self.added_tokens_encoder.keys()) + self.all_special_tokens
tokenized_text = split_on_tokens(added_tokens, text) tokenized_text = split_on_tokens(added_tokens, text)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment