Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e6ba2000
Unverified
Commit
e6ba2000
authored
Oct 16, 2025
by
Andrew Xia
Committed by
GitHub
Oct 16, 2025
Browse files
[gpt-oss][1/N] EZ: refactor serving_responses for modularity (#26948)
Signed-off-by:
Andrew Xia
<
axia@meta.com
>
parent
aa255ff5
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
70 additions
and
54 deletions
+70
-54
vllm/entrypoints/openai/serving_responses.py
vllm/entrypoints/openai/serving_responses.py
+70
-54
No files found.
vllm/entrypoints/openai/serving_responses.py
View file @
e6ba2000
...
@@ -227,6 +227,29 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -227,6 +227,29 @@ class OpenAIServingResponses(OpenAIServing):
)
)
return
None
return
None
def
_validate_create_responses_input
(
self
,
request
:
ResponsesRequest
)
->
ErrorResponse
|
None
:
if
self
.
use_harmony
and
request
.
is_include_output_logprobs
():
return
self
.
create_error_response
(
err_type
=
"invalid_request_error"
,
message
=
"logprobs are not supported with gpt-oss models"
,
status_code
=
HTTPStatus
.
BAD_REQUEST
,
)
if
request
.
store
and
not
self
.
enable_store
and
request
.
background
:
return
self
.
create_error_response
(
err_type
=
"invalid_request_error"
,
message
=
(
"This vLLM engine does not support `store=True` and "
"therefore does not support the background mode. To "
"enable these features, set the environment variable "
"`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
"the vLLM server."
),
status_code
=
HTTPStatus
.
BAD_REQUEST
,
)
return
None
async
def
create_responses
(
async
def
create_responses
(
self
,
self
,
request
:
ResponsesRequest
,
request
:
ResponsesRequest
,
...
@@ -240,6 +263,9 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -240,6 +263,9 @@ class OpenAIServingResponses(OpenAIServing):
if
error_check_ret
is
not
None
:
if
error_check_ret
is
not
None
:
logger
.
error
(
"Error with model %s"
,
error_check_ret
)
logger
.
error
(
"Error with model %s"
,
error_check_ret
)
return
error_check_ret
return
error_check_ret
maybe_validation_error
=
self
.
_validate_create_responses_input
(
request
)
if
maybe_validation_error
is
not
None
:
return
maybe_validation_error
# If the engine is dead, raise the engine's DEAD_ERROR.
# If the engine is dead, raise the engine's DEAD_ERROR.
# This is required for the streaming case, where we return a
# This is required for the streaming case, where we return a
...
@@ -248,18 +274,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -248,18 +274,6 @@ class OpenAIServingResponses(OpenAIServing):
raise
self
.
engine_client
.
dead_error
raise
self
.
engine_client
.
dead_error
if
request
.
store
and
not
self
.
enable_store
:
if
request
.
store
and
not
self
.
enable_store
:
if
request
.
background
:
return
self
.
create_error_response
(
err_type
=
"invalid_request_error"
,
message
=
(
"This vLLM engine does not support `store=True` and "
"therefore does not support the background mode. To "
"enable these features, set the environment variable "
"`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
"the vLLM server."
),
status_code
=
HTTPStatus
.
BAD_REQUEST
,
)
# Disable the store option.
# Disable the store option.
# NOTE(woosuk): Although returning an error is possible, we opted
# NOTE(woosuk): Although returning an error is possible, we opted
# to implicitly disable store and process the request anyway, as
# to implicitly disable store and process the request anyway, as
...
@@ -267,12 +281,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -267,12 +281,6 @@ class OpenAIServingResponses(OpenAIServing):
# (i.e., their request's `store=True` just because it's the default
# (i.e., their request's `store=True` just because it's the default
# value).
# value).
request
.
store
=
False
request
.
store
=
False
if
self
.
use_harmony
and
request
.
is_include_output_logprobs
():
return
self
.
create_error_response
(
err_type
=
"invalid_request_error"
,
message
=
"logprobs are not supported with gpt-oss models"
,
status_code
=
HTTPStatus
.
BAD_REQUEST
,
)
# Handle the previous response ID.
# Handle the previous response ID.
prev_response_id
=
request
.
previous_response_id
prev_response_id
=
request
.
previous_response_id
...
@@ -849,6 +857,47 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -849,6 +857,47 @@ class OpenAIServingResponses(OpenAIServing):
messages
.
extend
(
request
.
input
)
# type: ignore
messages
.
extend
(
request
.
input
)
# type: ignore
return
messages
return
messages
def
_construct_harmony_system_input_message
(
self
,
request
:
ResponsesRequest
,
with_custom_tools
:
bool
,
tool_types
:
list
[
str
]
)
->
OpenAIHarmonyMessage
:
reasoning_effort
=
request
.
reasoning
.
effort
if
request
.
reasoning
else
None
enable_browser
=
(
"web_search_preview"
in
tool_types
and
self
.
tool_server
is
not
None
and
self
.
tool_server
.
has_tool
(
"browser"
)
)
enable_code_interpreter
=
(
"code_interpreter"
in
tool_types
and
self
.
tool_server
is
not
None
and
self
.
tool_server
.
has_tool
(
"python"
)
)
enable_container
=
(
"container"
in
tool_types
and
self
.
tool_server
is
not
None
and
self
.
tool_server
.
has_tool
(
"container"
)
)
sys_msg
=
get_system_message
(
reasoning_effort
=
reasoning_effort
,
browser_description
=
(
self
.
tool_server
.
get_tool_description
(
"browser"
)
if
enable_browser
and
self
.
tool_server
is
not
None
else
None
),
python_description
=
(
self
.
tool_server
.
get_tool_description
(
"python"
)
if
enable_code_interpreter
and
self
.
tool_server
is
not
None
else
None
),
container_description
=
(
self
.
tool_server
.
get_tool_description
(
"container"
)
if
enable_container
and
self
.
tool_server
is
not
None
else
None
),
instructions
=
request
.
instructions
,
with_custom_tools
=
with_custom_tools
,
)
return
sys_msg
def
_construct_input_messages_with_harmony
(
def
_construct_input_messages_with_harmony
(
self
,
self
,
request
:
ResponsesRequest
,
request
:
ResponsesRequest
,
...
@@ -857,9 +906,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -857,9 +906,7 @@ class OpenAIServingResponses(OpenAIServing):
messages
:
list
[
OpenAIHarmonyMessage
]
=
[]
messages
:
list
[
OpenAIHarmonyMessage
]
=
[]
if
prev_response
is
None
:
if
prev_response
is
None
:
# New conversation.
# New conversation.
reasoning_effort
=
request
.
reasoning
.
effort
if
request
.
reasoning
else
None
tool_types
=
[
tool
.
type
for
tool
in
request
.
tools
]
tool_types
=
[
tool
.
type
for
tool
in
request
.
tools
]
# Allow the MCP Tool type to enable built in tools if the
# Allow the MCP Tool type to enable built in tools if the
# server_label is allowlisted in
# server_label is allowlisted in
# envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS
# envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS
...
@@ -870,41 +917,10 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -870,41 +917,10 @@ class OpenAIServingResponses(OpenAIServing):
and
tool
.
server_label
in
envs
.
GPT_OSS_SYSTEM_TOOL_MCP_LABELS
and
tool
.
server_label
in
envs
.
GPT_OSS_SYSTEM_TOOL_MCP_LABELS
):
):
tool_types
.
append
(
tool
.
server_label
)
tool_types
.
append
(
tool
.
server_label
)
enable_browser
=
(
"web_search_preview"
in
tool_types
and
self
.
tool_server
is
not
None
and
self
.
tool_server
.
has_tool
(
"browser"
)
)
enable_code_interpreter
=
(
"code_interpreter"
in
tool_types
and
self
.
tool_server
is
not
None
and
self
.
tool_server
.
has_tool
(
"python"
)
)
enable_container
=
(
"container"
in
tool_types
and
self
.
tool_server
is
not
None
and
self
.
tool_server
.
has_tool
(
"container"
)
)
with_custom_tools
=
has_custom_tools
(
tool_types
)
with_custom_tools
=
has_custom_tools
(
tool_types
)
sys_msg
=
get_system_message
(
reasoning_effort
=
reasoning_effort
,
sys_msg
=
self
.
_construct_harmony_system_input_message
(
browser_description
=
(
request
,
with_custom_tools
,
tool_types
self
.
tool_server
.
get_tool_description
(
"browser"
)
if
enable_browser
and
self
.
tool_server
is
not
None
else
None
),
python_description
=
(
self
.
tool_server
.
get_tool_description
(
"python"
)
if
enable_code_interpreter
and
self
.
tool_server
is
not
None
else
None
),
container_description
=
(
self
.
tool_server
.
get_tool_description
(
"container"
)
if
enable_container
and
self
.
tool_server
is
not
None
else
None
),
instructions
=
request
.
instructions
,
with_custom_tools
=
with_custom_tools
,
)
)
messages
.
append
(
sys_msg
)
messages
.
append
(
sys_msg
)
if
with_custom_tools
:
if
with_custom_tools
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment