Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
1afe3d07
"vscode:/vscode.git/clone" did not exist on "80183ca58bd49ee364804d2f3825139e44dcb732"
Unverified
Commit
1afe3d07
authored
Mar 27, 2025
by
Xihuai Wang
Committed by
GitHub
Mar 27, 2025
Browse files
Align finish reason and stream mode in openai api (#4388)
parent
44f47d3e
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
87 additions
and
80 deletions
+87
-80
python/sglang/srt/openai_api/adapter.py
python/sglang/srt/openai_api/adapter.py
+73
-75
python/sglang/srt/openai_api/protocol.py
python/sglang/srt/openai_api/protocol.py
+8
-4
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+6
-1
No files found.
python/sglang/srt/openai_api/adapter.py
View file @
1afe3d07
...
@@ -645,7 +645,7 @@ def v1_generate_response(
...
@@ -645,7 +645,7 @@ def v1_generate_response(
"index"
:
0
,
"index"
:
0
,
"text"
:
text
,
"text"
:
text
,
"logprobs"
:
logprobs
,
"logprobs"
:
logprobs
,
"finish_reason"
:
(
finish_reason
[
"type"
]
if
finish_reason
else
""
)
,
"finish_reason"
:
finish_reason
[
"type"
]
if
finish_reason
else
None
,
"matched_stop"
:
(
"matched_stop"
:
(
finish_reason
[
"matched"
]
finish_reason
[
"matched"
]
if
finish_reason
and
"matched"
in
finish_reason
if
finish_reason
and
"matched"
in
finish_reason
...
@@ -657,7 +657,7 @@ def v1_generate_response(
...
@@ -657,7 +657,7 @@ def v1_generate_response(
index
=
idx
,
index
=
idx
,
text
=
text
,
text
=
text
,
logprobs
=
logprobs
,
logprobs
=
logprobs
,
finish_reason
=
(
finish_reason
[
"type"
]
if
finish_reason
else
""
)
,
finish_reason
=
finish_reason
[
"type"
]
if
finish_reason
else
None
,
matched_stop
=
(
matched_stop
=
(
finish_reason
[
"matched"
]
finish_reason
[
"matched"
]
if
finish_reason
and
"matched"
in
finish_reason
if
finish_reason
and
"matched"
in
finish_reason
...
@@ -805,7 +805,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
...
@@ -805,7 +805,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
index
=
index
,
index
=
index
,
text
=
delta
,
text
=
delta
,
logprobs
=
logprobs
,
logprobs
=
logprobs
,
finish_reason
=
(
finish_reason
[
"type"
]
if
finish_reason
else
""
)
,
finish_reason
=
finish_reason
[
"type"
]
if
finish_reason
else
None
,
matched_stop
=
(
matched_stop
=
(
finish_reason
[
"matched"
]
finish_reason
[
"matched"
]
if
finish_reason
and
"matched"
in
finish_reason
if
finish_reason
and
"matched"
in
finish_reason
...
@@ -1216,7 +1216,7 @@ def v1_chat_generate_response(
...
@@ -1216,7 +1216,7 @@ def v1_chat_generate_response(
"reasoning_content"
:
reasoning_text
if
reasoning_text
else
None
,
"reasoning_content"
:
reasoning_text
if
reasoning_text
else
None
,
},
},
"logprobs"
:
choice_logprobs
.
model_dump
()
if
choice_logprobs
else
None
,
"logprobs"
:
choice_logprobs
.
model_dump
()
if
choice_logprobs
else
None
,
"finish_reason"
:
(
finish_reason
[
"type"
]
if
finish_reason
else
""
)
,
"finish_reason"
:
finish_reason
[
"type"
]
if
finish_reason
else
None
,
"matched_stop"
:
(
"matched_stop"
:
(
finish_reason
[
"matched"
]
finish_reason
[
"matched"
]
if
finish_reason
and
"matched"
in
finish_reason
if
finish_reason
and
"matched"
in
finish_reason
...
@@ -1233,7 +1233,7 @@ def v1_chat_generate_response(
...
@@ -1233,7 +1233,7 @@ def v1_chat_generate_response(
reasoning_content
=
reasoning_text
if
reasoning_text
else
None
,
reasoning_content
=
reasoning_text
if
reasoning_text
else
None
,
),
),
logprobs
=
choice_logprobs
,
logprobs
=
choice_logprobs
,
finish_reason
=
(
finish_reason
[
"type"
]
if
finish_reason
else
""
)
,
finish_reason
=
finish_reason
[
"type"
]
if
finish_reason
else
None
,
matched_stop
=
(
matched_stop
=
(
finish_reason
[
"matched"
]
finish_reason
[
"matched"
]
if
finish_reason
and
"matched"
in
finish_reason
if
finish_reason
and
"matched"
in
finish_reason
...
@@ -1377,23 +1377,11 @@ async def v1_chat_completions(
...
@@ -1377,23 +1377,11 @@ async def v1_chat_completions(
if
is_first
:
if
is_first
:
# First chunk with role
# First chunk with role
is_first
=
False
is_first
=
False
if
(
delta
=
DeltaMessage
(
role
=
"assistant"
)
tokenizer_manager
.
server_args
.
reasoning_parser
and
request
.
separate_reasoning
):
delta
=
DeltaMessage
(
role
=
"assistant"
,
reasoning_content
=
None
)
else
:
delta
=
DeltaMessage
(
role
=
"assistant"
,
content
=
None
)
choice_data
=
ChatCompletionResponseStreamChoice
(
choice_data
=
ChatCompletionResponseStreamChoice
(
index
=
index
,
index
=
index
,
delta
=
delta
,
delta
=
delta
,
finish_reason
=
(
finish_reason
=
finish_reason_type
,
None
if
finish_reason_type
and
len
(
finish_reason_type
)
==
0
else
finish_reason_type
),
matched_stop
=
(
matched_stop
=
(
finish_reason
[
"matched"
]
finish_reason
[
"matched"
]
if
finish_reason
and
"matched"
in
finish_reason
if
finish_reason
and
"matched"
in
finish_reason
...
@@ -1434,12 +1422,7 @@ async def v1_chat_completions(
...
@@ -1434,12 +1422,7 @@ async def v1_chat_completions(
reasoning_text
if
reasoning_text
else
None
reasoning_text
if
reasoning_text
else
None
)
)
),
),
finish_reason
=
(
finish_reason
=
finish_reason_type
,
None
if
finish_reason_type
and
len
(
finish_reason_type
)
==
0
else
finish_reason_type
),
)
)
chunk
=
ChatCompletionStreamResponse
(
chunk
=
ChatCompletionStreamResponse
(
id
=
content
[
"meta_info"
][
"id"
],
id
=
content
[
"meta_info"
][
"id"
],
...
@@ -1471,12 +1454,7 @@ async def v1_chat_completions(
...
@@ -1471,12 +1454,7 @@ async def v1_chat_completions(
delta
=
DeltaMessage
(
delta
=
DeltaMessage
(
content
=
normal_text
if
normal_text
else
None
content
=
normal_text
if
normal_text
else
None
),
),
finish_reason
=
(
finish_reason
=
finish_reason_type
,
None
if
finish_reason_type
and
len
(
finish_reason_type
)
==
0
else
finish_reason_type
),
)
)
chunk
=
ChatCompletionStreamResponse
(
chunk
=
ChatCompletionStreamResponse
(
id
=
content
[
"meta_info"
][
"id"
],
id
=
content
[
"meta_info"
][
"id"
],
...
@@ -1490,11 +1468,7 @@ async def v1_chat_completions(
...
@@ -1490,11 +1468,7 @@ async def v1_chat_completions(
for
call_item
in
calls
:
for
call_item
in
calls
:
# transform call_item -> FunctionResponse + ToolCall
# transform call_item -> FunctionResponse + ToolCall
if
(
if
finish_reason_type
==
"stop"
:
content
[
"meta_info"
][
"finish_reason"
]
and
content
[
"meta_info"
][
"finish_reason"
][
"type"
]
==
"stop"
):
latest_delta_len
=
0
latest_delta_len
=
0
if
isinstance
(
call_item
.
parameters
,
str
):
if
isinstance
(
call_item
.
parameters
,
str
):
latest_delta_len
=
len
(
call_item
.
parameters
)
latest_delta_len
=
len
(
call_item
.
parameters
)
...
@@ -1515,6 +1489,8 @@ async def v1_chat_completions(
...
@@ -1515,6 +1489,8 @@ async def v1_chat_completions(
)
)
call_item
.
parameters
=
remaining_call
call_item
.
parameters
=
remaining_call
finish_reason_type
=
"tool_calls"
tool_call
=
ToolCall
(
tool_call
=
ToolCall
(
id
=
str
(
call_item
.
tool_index
),
id
=
str
(
call_item
.
tool_index
),
function
=
FunctionResponse
(
function
=
FunctionResponse
(
...
@@ -1524,10 +1500,13 @@ async def v1_chat_completions(
...
@@ -1524,10 +1500,13 @@ async def v1_chat_completions(
)
)
choice_data
=
ChatCompletionResponseStreamChoice
(
choice_data
=
ChatCompletionResponseStreamChoice
(
index
=
index
,
index
=
index
,
delta
=
DeltaMessage
(
delta
=
DeltaMessage
(
tool_calls
=
[
tool_call
]),
role
=
"assistant"
,
tool_calls
=
[
tool_call
]
finish_reason
=
(
),
None
finish_reason
=
"tool_call"
,
if
request
.
stream_options
and
request
.
stream_options
.
include_usage
else
finish_reason_type
),
# additional chunk will be return
)
)
chunk
=
ChatCompletionStreamResponse
(
chunk
=
ChatCompletionStreamResponse
(
id
=
content
[
"meta_info"
][
"id"
],
id
=
content
[
"meta_info"
][
"id"
],
...
@@ -1542,30 +1521,44 @@ async def v1_chat_completions(
...
@@ -1542,30 +1521,44 @@ async def v1_chat_completions(
else
:
else
:
# No tool calls => just treat this as normal text
# No tool calls => just treat this as normal text
choice_data
=
ChatCompletionResponseStreamChoice
(
if
delta
or
not
(
index
=
index
,
request
.
stream_options
delta
=
DeltaMessage
(
content
=
delta
if
delta
else
None
),
and
request
.
stream_options
.
include_usage
finish_reason
=
(
):
None
choice_data
=
ChatCompletionResponseStreamChoice
(
if
finish_reason_type
and
len
(
finish_reason_type
)
==
0
index
=
index
,
else
finish_reason_type
delta
=
DeltaMessage
(
content
=
delta
if
delta
else
None
),
),
finish_reason
=
(
matched_stop
=
(
None
finish_reason
[
"matched"
]
if
request
.
stream_options
if
finish_reason
and
"matched"
in
finish_reason
and
request
.
stream_options
.
include_usage
else
None
else
finish_reason_type
),
),
logprobs
=
choice_logprobs
,
matched_stop
=
(
)
finish_reason
[
"matched"
]
chunk
=
ChatCompletionStreamResponse
(
if
finish_reason
and
"matched"
in
finish_reason
id
=
content
[
"meta_info"
][
"id"
],
else
None
created
=
created
,
),
choices
=
[
choice_data
],
logprobs
=
choice_logprobs
,
model
=
request
.
model
,
)
)
chunk
=
ChatCompletionStreamResponse
(
yield
f
"data:
{
chunk
.
model_dump_json
()
}
\n\n
"
id
=
content
[
"meta_info"
][
"id"
],
stream_buffers
[
index
]
=
new_stream_buffer
created
=
created
,
is_firsts
[
index
]
=
is_first
choices
=
[
choice_data
],
model
=
request
.
model
,
)
yield
f
"data:
{
chunk
.
model_dump_json
()
}
\n\n
"
stream_buffers
[
index
]
=
new_stream_buffer
is_firsts
[
index
]
=
is_first
if
finish_reason_type
==
"stop"
and
request
.
tool_choice
!=
"none"
:
parser
=
FunctionCallParser
(
tools
=
request
.
tools
,
tool_call_parser
=
tokenizer_manager
.
server_args
.
tool_call_parser
,
)
if
parser
.
has_tool_call
(
new_stream_buffer
):
# if the stream ends with empty string after tool calls
finish_reason_type
=
"tool_calls"
if
request
.
stream_options
and
request
.
stream_options
.
include_usage
:
if
request
.
stream_options
and
request
.
stream_options
.
include_usage
:
total_prompt_tokens
=
sum
(
total_prompt_tokens
=
sum
(
tokens
tokens
...
@@ -1590,17 +1583,22 @@ async def v1_chat_completions(
...
@@ -1590,17 +1583,22 @@ async def v1_chat_completions(
prompt_tokens_details
=
prompt_tokens_details
,
prompt_tokens_details
=
prompt_tokens_details
,
)
)
final_usage_chunk
=
ChatCompletionStreamResponse
(
else
:
id
=
content
[
"meta_info"
][
"id"
],
usage
=
None
created
=
created
,
final_usage_chunk
=
ChatCompletionStreamResponse
(
choices
=
[],
id
=
content
[
"meta_info"
][
"id"
],
model
=
request
.
model
,
created
=
created
,
usage
=
usage
,
choices
=
[
)
ChatCompletionResponseStreamChoice
(
final_usage_data
=
final_usage_chunk
.
model_dump_json
(
index
=
index
,
exclude_none
=
True
delta
=
DeltaMessage
(),
)
finish_reason
=
finish_reason_type
,
yield
f
"data:
{
final_usage_data
}
\n\n
"
)
],
model
=
request
.
model
,
usage
=
usage
,
)
yield
f
"data:
{
final_usage_chunk
.
model_dump_json
()
}
\n\n
"
except
ValueError
as
e
:
except
ValueError
as
e
:
error
=
create_streaming_error_response
(
str
(
e
))
error
=
create_streaming_error_response
(
str
(
e
))
yield
f
"data:
{
error
}
\n\n
"
yield
f
"data:
{
error
}
\n\n
"
...
...
python/sglang/srt/openai_api/protocol.py
View file @
1afe3d07
...
@@ -187,7 +187,7 @@ class CompletionResponseChoice(BaseModel):
...
@@ -187,7 +187,7 @@ class CompletionResponseChoice(BaseModel):
index
:
int
index
:
int
text
:
str
text
:
str
logprobs
:
Optional
[
LogProbs
]
=
None
logprobs
:
Optional
[
LogProbs
]
=
None
finish_reason
:
Option
al
[
st
r
]
=
None
finish_reason
:
Liter
al
[
"
st
op"
,
"length"
,
"content_filter"
]
matched_stop
:
Union
[
None
,
int
,
str
]
=
None
matched_stop
:
Union
[
None
,
int
,
str
]
=
None
...
@@ -204,7 +204,7 @@ class CompletionResponseStreamChoice(BaseModel):
...
@@ -204,7 +204,7 @@ class CompletionResponseStreamChoice(BaseModel):
index
:
int
index
:
int
text
:
str
text
:
str
logprobs
:
Optional
[
LogProbs
]
=
None
logprobs
:
Optional
[
LogProbs
]
=
None
finish_reason
:
Optional
[
str
]
=
None
finish_reason
:
Optional
[
Literal
[
"stop"
,
"length"
,
"content_filter"
]
]
=
None
matched_stop
:
Union
[
None
,
int
,
str
]
=
None
matched_stop
:
Union
[
None
,
int
,
str
]
=
None
...
@@ -387,7 +387,9 @@ class ChatCompletionResponseChoice(BaseModel):
...
@@ -387,7 +387,9 @@ class ChatCompletionResponseChoice(BaseModel):
index
:
int
index
:
int
message
:
ChatMessage
message
:
ChatMessage
logprobs
:
Optional
[
Union
[
LogProbs
,
ChoiceLogprobs
]]
=
None
logprobs
:
Optional
[
Union
[
LogProbs
,
ChoiceLogprobs
]]
=
None
finish_reason
:
str
finish_reason
:
Literal
[
"stop"
,
"length"
,
"tool_calls"
,
"content_filter"
,
"function_call"
]
matched_stop
:
Union
[
None
,
int
,
str
]
=
None
matched_stop
:
Union
[
None
,
int
,
str
]
=
None
...
@@ -411,7 +413,9 @@ class ChatCompletionResponseStreamChoice(BaseModel):
...
@@ -411,7 +413,9 @@ class ChatCompletionResponseStreamChoice(BaseModel):
index
:
int
index
:
int
delta
:
DeltaMessage
delta
:
DeltaMessage
logprobs
:
Optional
[
Union
[
LogProbs
,
ChoiceLogprobs
]]
=
None
logprobs
:
Optional
[
Union
[
LogProbs
,
ChoiceLogprobs
]]
=
None
finish_reason
:
Optional
[
str
]
=
None
finish_reason
:
Optional
[
Literal
[
"stop"
,
"length"
,
"tool_calls"
,
"content_filter"
,
"function_call"
]
]
=
None
matched_stop
:
Union
[
None
,
int
,
str
]
=
None
matched_stop
:
Union
[
None
,
int
,
str
]
=
None
...
...
test/srt/test_openai_server.py
View file @
1afe3d07
...
@@ -258,7 +258,12 @@ class TestOpenAIServer(CustomTestCase):
...
@@ -258,7 +258,12 @@ class TestOpenAIServer(CustomTestCase):
ret_num_top_logprobs
==
logprobs
ret_num_top_logprobs
==
logprobs
),
f
"
{
ret_num_top_logprobs
}
vs
{
logprobs
}
"
),
f
"
{
ret_num_top_logprobs
}
vs
{
logprobs
}
"
assert
isinstance
(
data
.
content
,
str
)
or
response
.
choices
[
0
].
finish_reason
assert
(
isinstance
(
data
.
content
,
str
)
or
isinstance
(
data
.
reasoning_content
,
str
)
or
len
(
data
.
tool_calls
)
>
0
or
response
.
choices
[
0
].
finish_reason
)
assert
response
.
id
assert
response
.
id
assert
response
.
created
assert
response
.
created
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment