"vscode:/vscode.git/clone" did not exist on "80183ca58bd49ee364804d2f3825139e44dcb732"
Unverified Commit 1afe3d07 authored by Xihuai Wang's avatar Xihuai Wang Committed by GitHub
Browse files

Align finish reason and stream mode in openai api (#4388)

parent 44f47d3e
...@@ -645,7 +645,7 @@ def v1_generate_response( ...@@ -645,7 +645,7 @@ def v1_generate_response(
"index": 0, "index": 0,
"text": text, "text": text,
"logprobs": logprobs, "logprobs": logprobs,
"finish_reason": (finish_reason["type"] if finish_reason else ""), "finish_reason": finish_reason["type"] if finish_reason else None,
"matched_stop": ( "matched_stop": (
finish_reason["matched"] finish_reason["matched"]
if finish_reason and "matched" in finish_reason if finish_reason and "matched" in finish_reason
...@@ -657,7 +657,7 @@ def v1_generate_response( ...@@ -657,7 +657,7 @@ def v1_generate_response(
index=idx, index=idx,
text=text, text=text,
logprobs=logprobs, logprobs=logprobs,
finish_reason=(finish_reason["type"] if finish_reason else ""), finish_reason=finish_reason["type"] if finish_reason else None,
matched_stop=( matched_stop=(
finish_reason["matched"] finish_reason["matched"]
if finish_reason and "matched" in finish_reason if finish_reason and "matched" in finish_reason
...@@ -805,7 +805,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request): ...@@ -805,7 +805,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
index=index, index=index,
text=delta, text=delta,
logprobs=logprobs, logprobs=logprobs,
finish_reason=(finish_reason["type"] if finish_reason else ""), finish_reason=finish_reason["type"] if finish_reason else None,
matched_stop=( matched_stop=(
finish_reason["matched"] finish_reason["matched"]
if finish_reason and "matched" in finish_reason if finish_reason and "matched" in finish_reason
...@@ -1216,7 +1216,7 @@ def v1_chat_generate_response( ...@@ -1216,7 +1216,7 @@ def v1_chat_generate_response(
"reasoning_content": reasoning_text if reasoning_text else None, "reasoning_content": reasoning_text if reasoning_text else None,
}, },
"logprobs": choice_logprobs.model_dump() if choice_logprobs else None, "logprobs": choice_logprobs.model_dump() if choice_logprobs else None,
"finish_reason": (finish_reason["type"] if finish_reason else ""), "finish_reason": finish_reason["type"] if finish_reason else None,
"matched_stop": ( "matched_stop": (
finish_reason["matched"] finish_reason["matched"]
if finish_reason and "matched" in finish_reason if finish_reason and "matched" in finish_reason
...@@ -1233,7 +1233,7 @@ def v1_chat_generate_response( ...@@ -1233,7 +1233,7 @@ def v1_chat_generate_response(
reasoning_content=reasoning_text if reasoning_text else None, reasoning_content=reasoning_text if reasoning_text else None,
), ),
logprobs=choice_logprobs, logprobs=choice_logprobs,
finish_reason=(finish_reason["type"] if finish_reason else ""), finish_reason=finish_reason["type"] if finish_reason else None,
matched_stop=( matched_stop=(
finish_reason["matched"] finish_reason["matched"]
if finish_reason and "matched" in finish_reason if finish_reason and "matched" in finish_reason
...@@ -1377,23 +1377,11 @@ async def v1_chat_completions( ...@@ -1377,23 +1377,11 @@ async def v1_chat_completions(
if is_first: if is_first:
# First chunk with role # First chunk with role
is_first = False is_first = False
if ( delta = DeltaMessage(role="assistant")
tokenizer_manager.server_args.reasoning_parser
and request.separate_reasoning
):
delta = DeltaMessage(
role="assistant", reasoning_content=None
)
else:
delta = DeltaMessage(role="assistant", content=None)
choice_data = ChatCompletionResponseStreamChoice( choice_data = ChatCompletionResponseStreamChoice(
index=index, index=index,
delta=delta, delta=delta,
finish_reason=( finish_reason=finish_reason_type,
None
if finish_reason_type and len(finish_reason_type) == 0
else finish_reason_type
),
matched_stop=( matched_stop=(
finish_reason["matched"] finish_reason["matched"]
if finish_reason and "matched" in finish_reason if finish_reason and "matched" in finish_reason
...@@ -1434,12 +1422,7 @@ async def v1_chat_completions( ...@@ -1434,12 +1422,7 @@ async def v1_chat_completions(
reasoning_text if reasoning_text else None reasoning_text if reasoning_text else None
) )
), ),
finish_reason=( finish_reason=finish_reason_type,
None
if finish_reason_type
and len(finish_reason_type) == 0
else finish_reason_type
),
) )
chunk = ChatCompletionStreamResponse( chunk = ChatCompletionStreamResponse(
id=content["meta_info"]["id"], id=content["meta_info"]["id"],
...@@ -1471,12 +1454,7 @@ async def v1_chat_completions( ...@@ -1471,12 +1454,7 @@ async def v1_chat_completions(
delta=DeltaMessage( delta=DeltaMessage(
content=normal_text if normal_text else None content=normal_text if normal_text else None
), ),
finish_reason=( finish_reason=finish_reason_type,
None
if finish_reason_type
and len(finish_reason_type) == 0
else finish_reason_type
),
) )
chunk = ChatCompletionStreamResponse( chunk = ChatCompletionStreamResponse(
id=content["meta_info"]["id"], id=content["meta_info"]["id"],
...@@ -1490,11 +1468,7 @@ async def v1_chat_completions( ...@@ -1490,11 +1468,7 @@ async def v1_chat_completions(
for call_item in calls: for call_item in calls:
# transform call_item -> FunctionResponse + ToolCall # transform call_item -> FunctionResponse + ToolCall
if ( if finish_reason_type == "stop":
content["meta_info"]["finish_reason"]
and content["meta_info"]["finish_reason"]["type"]
== "stop"
):
latest_delta_len = 0 latest_delta_len = 0
if isinstance(call_item.parameters, str): if isinstance(call_item.parameters, str):
latest_delta_len = len(call_item.parameters) latest_delta_len = len(call_item.parameters)
...@@ -1515,6 +1489,8 @@ async def v1_chat_completions( ...@@ -1515,6 +1489,8 @@ async def v1_chat_completions(
) )
call_item.parameters = remaining_call call_item.parameters = remaining_call
finish_reason_type = "tool_calls"
tool_call = ToolCall( tool_call = ToolCall(
id=str(call_item.tool_index), id=str(call_item.tool_index),
function=FunctionResponse( function=FunctionResponse(
...@@ -1524,10 +1500,13 @@ async def v1_chat_completions( ...@@ -1524,10 +1500,13 @@ async def v1_chat_completions(
) )
choice_data = ChatCompletionResponseStreamChoice( choice_data = ChatCompletionResponseStreamChoice(
index=index, index=index,
delta=DeltaMessage( delta=DeltaMessage(tool_calls=[tool_call]),
role="assistant", tool_calls=[tool_call] finish_reason=(
), None
finish_reason="tool_call", if request.stream_options
and request.stream_options.include_usage
else finish_reason_type
), # additional chunk will be return
) )
chunk = ChatCompletionStreamResponse( chunk = ChatCompletionStreamResponse(
id=content["meta_info"]["id"], id=content["meta_info"]["id"],
...@@ -1542,30 +1521,44 @@ async def v1_chat_completions( ...@@ -1542,30 +1521,44 @@ async def v1_chat_completions(
else: else:
# No tool calls => just treat this as normal text # No tool calls => just treat this as normal text
choice_data = ChatCompletionResponseStreamChoice( if delta or not (
index=index, request.stream_options
delta=DeltaMessage(content=delta if delta else None), and request.stream_options.include_usage
finish_reason=( ):
None choice_data = ChatCompletionResponseStreamChoice(
if finish_reason_type and len(finish_reason_type) == 0 index=index,
else finish_reason_type delta=DeltaMessage(content=delta if delta else None),
), finish_reason=(
matched_stop=( None
finish_reason["matched"] if request.stream_options
if finish_reason and "matched" in finish_reason and request.stream_options.include_usage
else None else finish_reason_type
), ),
logprobs=choice_logprobs, matched_stop=(
) finish_reason["matched"]
chunk = ChatCompletionStreamResponse( if finish_reason and "matched" in finish_reason
id=content["meta_info"]["id"], else None
created=created, ),
choices=[choice_data], logprobs=choice_logprobs,
model=request.model, )
) chunk = ChatCompletionStreamResponse(
yield f"data: {chunk.model_dump_json()}\n\n" id=content["meta_info"]["id"],
stream_buffers[index] = new_stream_buffer created=created,
is_firsts[index] = is_first choices=[choice_data],
model=request.model,
)
yield f"data: {chunk.model_dump_json()}\n\n"
stream_buffers[index] = new_stream_buffer
is_firsts[index] = is_first
if finish_reason_type == "stop" and request.tool_choice != "none":
parser = FunctionCallParser(
tools=request.tools,
tool_call_parser=tokenizer_manager.server_args.tool_call_parser,
)
if parser.has_tool_call(new_stream_buffer):
# if the stream ends with empty string after tool calls
finish_reason_type = "tool_calls"
if request.stream_options and request.stream_options.include_usage: if request.stream_options and request.stream_options.include_usage:
total_prompt_tokens = sum( total_prompt_tokens = sum(
tokens tokens
...@@ -1590,17 +1583,22 @@ async def v1_chat_completions( ...@@ -1590,17 +1583,22 @@ async def v1_chat_completions(
prompt_tokens_details=prompt_tokens_details, prompt_tokens_details=prompt_tokens_details,
) )
final_usage_chunk = ChatCompletionStreamResponse( else:
id=content["meta_info"]["id"], usage = None
created=created, final_usage_chunk = ChatCompletionStreamResponse(
choices=[], id=content["meta_info"]["id"],
model=request.model, created=created,
usage=usage, choices=[
) ChatCompletionResponseStreamChoice(
final_usage_data = final_usage_chunk.model_dump_json( index=index,
exclude_none=True delta=DeltaMessage(),
) finish_reason=finish_reason_type,
yield f"data: {final_usage_data}\n\n" )
],
model=request.model,
usage=usage,
)
yield f"data: {final_usage_chunk.model_dump_json()}\n\n"
except ValueError as e: except ValueError as e:
error = create_streaming_error_response(str(e)) error = create_streaming_error_response(str(e))
yield f"data: {error}\n\n" yield f"data: {error}\n\n"
......
...@@ -187,7 +187,7 @@ class CompletionResponseChoice(BaseModel): ...@@ -187,7 +187,7 @@ class CompletionResponseChoice(BaseModel):
index: int index: int
text: str text: str
logprobs: Optional[LogProbs] = None logprobs: Optional[LogProbs] = None
finish_reason: Optional[str] = None finish_reason: Literal["stop", "length", "content_filter"]
matched_stop: Union[None, int, str] = None matched_stop: Union[None, int, str] = None
...@@ -204,7 +204,7 @@ class CompletionResponseStreamChoice(BaseModel): ...@@ -204,7 +204,7 @@ class CompletionResponseStreamChoice(BaseModel):
index: int index: int
text: str text: str
logprobs: Optional[LogProbs] = None logprobs: Optional[LogProbs] = None
finish_reason: Optional[str] = None finish_reason: Optional[Literal["stop", "length", "content_filter"]] = None
matched_stop: Union[None, int, str] = None matched_stop: Union[None, int, str] = None
...@@ -387,7 +387,9 @@ class ChatCompletionResponseChoice(BaseModel): ...@@ -387,7 +387,9 @@ class ChatCompletionResponseChoice(BaseModel):
index: int index: int
message: ChatMessage message: ChatMessage
logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None
finish_reason: str finish_reason: Literal[
"stop", "length", "tool_calls", "content_filter", "function_call"
]
matched_stop: Union[None, int, str] = None matched_stop: Union[None, int, str] = None
...@@ -411,7 +413,9 @@ class ChatCompletionResponseStreamChoice(BaseModel): ...@@ -411,7 +413,9 @@ class ChatCompletionResponseStreamChoice(BaseModel):
index: int index: int
delta: DeltaMessage delta: DeltaMessage
logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None
finish_reason: Optional[str] = None finish_reason: Optional[
Literal["stop", "length", "tool_calls", "content_filter", "function_call"]
] = None
matched_stop: Union[None, int, str] = None matched_stop: Union[None, int, str] = None
......
...@@ -258,7 +258,12 @@ class TestOpenAIServer(CustomTestCase): ...@@ -258,7 +258,12 @@ class TestOpenAIServer(CustomTestCase):
ret_num_top_logprobs == logprobs ret_num_top_logprobs == logprobs
), f"{ret_num_top_logprobs} vs {logprobs}" ), f"{ret_num_top_logprobs} vs {logprobs}"
assert isinstance(data.content, str) or response.choices[0].finish_reason assert (
isinstance(data.content, str)
or isinstance(data.reasoning_content, str)
or len(data.tool_calls) > 0
or response.choices[0].finish_reason
)
assert response.id assert response.id
assert response.created assert response.created
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment