Commit 5b7b78e0 authored by Pascal Voitot's avatar Pascal Voitot Committed by Lysandre Debut
Browse files

🐛 #2096 in tokenizer.decode, adds a space after special tokens to return right formatted string

parent 866d73ca
...@@ -1180,7 +1180,7 @@ class PreTrainedTokenizer(object): ...@@ -1180,7 +1180,7 @@ class PreTrainedTokenizer(object):
if current_sub_text: if current_sub_text:
sub_texts.append(self.convert_tokens_to_string(current_sub_text)) sub_texts.append(self.convert_tokens_to_string(current_sub_text))
current_sub_text = [] current_sub_text = []
sub_texts.append(" " + token) sub_texts.append(" " + token + " ")
else: else:
current_sub_text.append(token) current_sub_text.append(token)
if current_sub_text: if current_sub_text:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment