Commit 391db836 authored by thomwolf's avatar thomwolf
Browse files

fix #1260 - remove special logic for decoding pairs of sequence

parent 963529e2
...@@ -933,20 +933,11 @@ class PreTrainedTokenizer(object): ...@@ -933,20 +933,11 @@ class PreTrainedTokenizer(object):
sub_texts.append(self.convert_tokens_to_string(current_sub_text)) sub_texts.append(self.convert_tokens_to_string(current_sub_text))
text = ''.join(sub_texts) text = ''.join(sub_texts)
if self._sep_token is not None and self._sep_token in text: if clean_up_tokenization_spaces:
text = text.replace(self._cls_token, self._sep_token) clean_text = self.clean_up_tokenization(text)
split_text = list(filter(lambda sentence: len(sentence) > 0, text.split(self._sep_token))) return clean_text
if clean_up_tokenization_spaces:
clean_text = [self.clean_up_tokenization(text) for text in split_text]
return clean_text
else:
return split_text
else: else:
if clean_up_tokenization_spaces: return text
clean_text = self.clean_up_tokenization(text)
return clean_text
else:
return text
@property @property
def special_tokens_map(self): def special_tokens_map(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment