Unverified Commit 15d19ecf authored by Philip May's avatar Philip May Committed by GitHub
Browse files

fix convert_tokens_to_string calls (#11716)

parent c3d9ac76
......@@ -238,8 +238,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
return self.sp_model.IdToPiece(index)
def convert_tokens_to_string(self, tokens):
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
return out_string
return self.sp_model.decode(tokens)
def build_inputs_with_special_tokens(
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
......
......@@ -271,8 +271,7 @@ class BarthezTokenizer(PreTrainedTokenizer):
def convert_tokens_to_string(self, tokens):
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
return out_string
return self.sp_model.decode(tokens)
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
......
......@@ -271,8 +271,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
def convert_tokens_to_string(self, tokens):
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
return out_string
return self.sp_model.decode(tokens)
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
......
......@@ -202,8 +202,7 @@ class M2M100Tokenizer(PreTrainedTokenizer):
def convert_tokens_to_string(self, tokens: List[str]) -> str:
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
return out_string
return self.sp_model.decode(tokens)
def get_special_tokens_mask(
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
......
......@@ -228,8 +228,7 @@ class MBart50Tokenizer(PreTrainedTokenizer):
def convert_tokens_to_string(self, tokens: List[str]) -> str:
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
return out_string
return self.sp_model.decode(tokens)
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
......
......@@ -185,7 +185,7 @@ class Speech2TextTokenizer(PreTrainedTokenizer):
def convert_tokens_to_string(self, tokens: List[str]) -> str:
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
out_string = self.sp_model.decode(tokens)
if self.do_upper_case:
out_string = out_string.upper()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment