"test/git@developer.sourcefind.cn:change/sglang.git" did not exist on "08ab2a1655224a671fd8d356387aa83f3179129a"
Commit 8b388827 authored by thomwolf's avatar thomwolf
Browse files

fix #1920

parent d425a4d6
...@@ -192,9 +192,9 @@ class CTRLTokenizer(PreTrainedTokenizer): ...@@ -192,9 +192,9 @@ class CTRLTokenizer(PreTrainedTokenizer):
""" """
split_tokens = [] split_tokens = []
text = text.split(' ') words = re.findall(r'\S+\n?', text)
for token in text: for token in words:
split_tokens.extend([t for t in self.bpe(token).split(' ')]) split_tokens.extend([t for t in self.bpe(token).split(' ')])
return split_tokens return split_tokens
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment