Unverified Commit 1aa13615 authored by Jin Shang's avatar Jin Shang Committed by GitHub
Browse files

Fix OpenAI server completion_tokens referenced before assignment (#1996)

parent fe470ae5
...@@ -332,8 +332,7 @@ async def create_chat_completion(request: ChatCompletionRequest, ...@@ -332,8 +332,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
# Send token-by-token response for each request.n # Send token-by-token response for each request.n
delta_text = output.text[len(previous_texts[i]):] delta_text = output.text[len(previous_texts[i]):]
previous_texts[i] = output.text previous_texts[i] = output.text
completion_tokens = len(output.token_ids) previous_num_tokens[i] = len(output.token_ids)
previous_num_tokens[i] = completion_tokens
choice_data = ChatCompletionResponseStreamChoice( choice_data = ChatCompletionResponseStreamChoice(
index=i, index=i,
delta=DeltaMessage(content=delta_text), delta=DeltaMessage(content=delta_text),
...@@ -351,8 +350,8 @@ async def create_chat_completion(request: ChatCompletionRequest, ...@@ -351,8 +350,8 @@ async def create_chat_completion(request: ChatCompletionRequest,
prompt_tokens = len(res.prompt_token_ids) prompt_tokens = len(res.prompt_token_ids)
final_usage = UsageInfo( final_usage = UsageInfo(
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens, completion_tokens=previous_num_tokens[i],
total_tokens=prompt_tokens + completion_tokens, total_tokens=prompt_tokens + previous_num_tokens[i],
) )
choice_data = ChatCompletionResponseStreamChoice( choice_data = ChatCompletionResponseStreamChoice(
index=i, delta=[], finish_reason=output.finish_reason) index=i, delta=[], finish_reason=output.finish_reason)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment