Unverified Commit 3b29aaee authored by philipwangOvO's avatar philipwangOvO Committed by GitHub
Browse files

[Fix] bin_trim (#237)


Co-authored-by: default avatarwangchonghua <wangchonghua@pjlab.org.cn>
parent 655a807f
...@@ -281,18 +281,20 @@ class OpenAI(BaseAPIModel): ...@@ -281,18 +281,20 @@ class OpenAI(BaseAPIModel):
pattern = re.compile(r'[\u4e00-\u9fa5]') pattern = re.compile(r'[\u4e00-\u9fa5]')
if pattern.search(prompt): if pattern.search(prompt):
words = list(jieba.cut(prompt, cut_all=False)) words = list(jieba.cut(prompt, cut_all=False))
sep = ''
else: else:
words = prompt.split(' ') words = prompt.split(' ')
sep = ' '
l, r = 1, len(words) l, r = 1, len(words)
while l + 2 < r: while l + 2 < r:
mid = (l + r) // 2 mid = (l + r) // 2
if self.mode == 'front': if self.mode == 'front':
cur_prompt = ' '.join(words[-mid:]) cur_prompt = sep.join(words[-mid:])
elif self.mode == 'mid': elif self.mode == 'mid':
cur_prompt = ' '.join(words[:mid]) + ' '.join(words[-mid:]) cur_prompt = sep.join(words[:mid]) + sep.join(words[-mid:])
elif self.mode == 'rear': elif self.mode == 'rear':
cur_prompt = ' '.join(words[:mid]) cur_prompt = sep.join(words[:mid])
if self.get_token_len(cur_prompt) <= num_token: if self.get_token_len(cur_prompt) <= num_token:
l = mid # noqa: E741 l = mid # noqa: E741
...@@ -300,9 +302,9 @@ class OpenAI(BaseAPIModel): ...@@ -300,9 +302,9 @@ class OpenAI(BaseAPIModel):
r = mid r = mid
if self.mode == 'front': if self.mode == 'front':
prompt = ' '.join(words[-l:]) prompt = sep.join(words[-l:])
elif self.mode == 'mid': elif self.mode == 'mid':
prompt = ' '.join(words[:l]) + ' '.join(words[-l:]) prompt = sep.join(words[:l]) + sep.join(words[-l:])
elif self.mode == 'rear': elif self.mode == 'rear':
prompt = ' '.join(words[:l]) prompt = sep.join(words[:l])
return prompt return prompt
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment