Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ad344ef5
Unverified
Commit
ad344ef5
authored
Aug 11, 2025
by
Chen Zhang
Committed by
GitHub
Aug 11, 2025
Browse files
[gpt-oss] Small bug fixes for frontend (#22512)
Signed-off-by:
Chen Zhang
<
zhangch99@outlook.com
>
parent
bbaf9e9c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
77 additions
and
33 deletions
+77
-33
vllm/entrypoints/context.py
vllm/entrypoints/context.py
+42
-14
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+3
-2
vllm/entrypoints/openai/serving_responses.py
vllm/entrypoints/openai/serving_responses.py
+16
-13
vllm/entrypoints/tool.py
vllm/entrypoints/tool.py
+13
-2
vllm/entrypoints/tool_server.py
vllm/entrypoints/tool_server.py
+3
-2
No files found.
vllm/entrypoints/context.py
View file @
ad344ef5
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
json
import
logging
import
logging
from
abc
import
ABC
,
abstractmethod
from
abc
import
ABC
,
abstractmethod
from
typing
import
TYPE_CHECKING
,
Union
from
openai_harmony
import
Message
,
Role
,
StreamState
from
openai_harmony
import
Author
,
Message
,
Role
,
StreamState
,
TextContent
from
vllm.entrypoints.harmony_utils
import
(
from
vllm.entrypoints.harmony_utils
import
(
get_encoding
,
get_streamable_parser_for_assistant
,
render_for_completion
)
get_encoding
,
get_streamable_parser_for_assistant
,
render_for_completion
)
from
vllm.entrypoints.tool
import
Tool
from
vllm.entrypoints.tool
import
Tool
from
vllm.outputs
import
RequestOutput
from
vllm.outputs
import
RequestOutput
if
TYPE_CHECKING
:
from
mcp.client
import
ClientSession
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -71,6 +76,7 @@ class HarmonyContext(ConversationContext):
...
@@ -71,6 +76,7 @@ class HarmonyContext(ConversationContext):
def
append_output
(
self
,
output
)
->
None
:
def
append_output
(
self
,
output
)
->
None
:
if
isinstance
(
output
,
RequestOutput
):
if
isinstance
(
output
,
RequestOutput
):
output_token_ids
=
output
.
outputs
[
0
].
token_ids
output_token_ids
=
output
.
outputs
[
0
].
token_ids
self
.
parser
=
get_streamable_parser_for_assistant
()
for
token_id
in
output_token_ids
:
for
token_id
in
output_token_ids
:
self
.
parser
.
process
(
token_id
)
self
.
parser
.
process
(
token_id
)
output_msgs
=
self
.
parser
.
messages
output_msgs
=
self
.
parser
.
messages
...
@@ -106,19 +112,41 @@ class HarmonyContext(ConversationContext):
...
@@ -106,19 +112,41 @@ class HarmonyContext(ConversationContext):
def
render_for_completion
(
self
)
->
list
[
int
]:
def
render_for_completion
(
self
)
->
list
[
int
]:
return
render_for_completion
(
self
.
messages
)
return
render_for_completion
(
self
.
messages
)
async
def
call_search_tool
(
async
def
call_search_tool
(
self
,
tool_session
:
Union
[
"ClientSession"
,
self
,
Tool
],
tool_session
:
Tool
,
last_msg
:
Message
)
->
list
[
Message
]:
last_msg
:
Message
,
if
isinstance
(
tool_session
,
Tool
):
)
->
list
[
Message
]:
return
await
tool_session
.
get_result
(
self
)
return
await
tool_session
.
get_result
(
self
)
tool_name
=
last_msg
.
recipient
.
split
(
"."
)[
1
]
args
=
json
.
loads
(
last_msg
.
content
[
0
].
text
)
async
def
call_python_tool
(
result
=
await
tool_session
.
call_tool
(
tool_name
,
args
)
self
,
result_str
=
result
.
content
[
0
].
text
tool_session
:
Tool
,
content
=
TextContent
(
text
=
result_str
)
last_msg
:
Message
,
author
=
Author
(
role
=
Role
.
TOOL
,
name
=
last_msg
.
recipient
)
)
->
list
[
Message
]:
return
[
return
await
tool_session
.
get_result
(
self
)
Message
(
author
=
author
,
content
=
[
content
],
recipient
=
Role
.
ASSISTANT
)
]
async
def
call_python_tool
(
self
,
tool_session
:
Union
[
"ClientSession"
,
Tool
],
last_msg
:
Message
)
->
list
[
Message
]:
if
isinstance
(
tool_session
,
Tool
):
return
await
tool_session
.
get_result
(
self
)
param
=
{
"code"
:
last_msg
.
content
[
0
].
text
,
}
result
=
await
tool_session
.
call_tool
(
"python"
,
param
)
result_str
=
result
.
content
[
0
].
text
content
=
TextContent
(
text
=
result_str
)
author
=
Author
(
role
=
Role
.
TOOL
,
name
=
"python"
)
return
[
Message
(
author
=
author
,
content
=
[
content
],
channel
=
last_msg
.
channel
,
recipient
=
Role
.
ASSISTANT
)
]
class
StreamingHarmonyContext
(
HarmonyContext
):
class
StreamingHarmonyContext
(
HarmonyContext
):
...
...
vllm/entrypoints/openai/protocol.py
View file @
ad344ef5
...
@@ -19,8 +19,8 @@ from openai.types.chat.chat_completion_message import (
...
@@ -19,8 +19,8 @@ from openai.types.chat.chat_completion_message import (
# yapf: enable
# yapf: enable
from
openai.types.responses
import
(
ResponseFunctionToolCall
,
from
openai.types.responses
import
(
ResponseFunctionToolCall
,
ResponseInputItemParam
,
ResponseOutputItem
,
ResponseInputItemParam
,
ResponseOutputItem
,
ResponsePrompt
,
Response
Status
,
ResponsePrompt
,
Response
ReasoningItem
,
ResponseTextConfig
)
ResponseStatus
,
ResponseTextConfig
)
from
openai.types.responses.response
import
ToolChoice
from
openai.types.responses.response
import
ToolChoice
from
openai.types.responses.tool
import
Tool
from
openai.types.responses.tool
import
Tool
from
openai.types.shared
import
Metadata
,
Reasoning
from
openai.types.shared
import
Metadata
,
Reasoning
...
@@ -239,6 +239,7 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
...
@@ -239,6 +239,7 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
ResponseInputOutputItem
:
TypeAlias
=
Union
[
ResponseInputItemParam
,
ResponseInputOutputItem
:
TypeAlias
=
Union
[
ResponseInputItemParam
,
ResponseReasoningItem
,
ResponseFunctionToolCall
]
ResponseFunctionToolCall
]
...
...
vllm/entrypoints/openai/serving_responses.py
View file @
ad344ef5
...
@@ -16,8 +16,7 @@ from fastapi import Request
...
@@ -16,8 +16,7 @@ from fastapi import Request
from
openai
import
BaseModel
from
openai
import
BaseModel
# yapf conflicts with isort for this block
# yapf conflicts with isort for this block
# yapf: disable
# yapf: disable
from
openai.types.responses
import
(
ResponseContentPartDoneEvent
,
from
openai.types.responses
import
(
ResponseCreatedEvent
,
ResponseCreatedEvent
,
ResponseFunctionToolCall
,
ResponseFunctionToolCall
,
ResponseInProgressEvent
,
ResponseInProgressEvent
,
ResponseOutputItem
,
ResponseOutputItem
,
...
@@ -54,7 +53,7 @@ from vllm.entrypoints.openai.protocol import (ErrorResponse,
...
@@ -54,7 +53,7 @@ from vllm.entrypoints.openai.protocol import (ErrorResponse,
# yapf: enable
# yapf: enable
from
vllm.entrypoints.openai.serving_engine
import
OpenAIServing
from
vllm.entrypoints.openai.serving_engine
import
OpenAIServing
from
vllm.entrypoints.openai.serving_models
import
OpenAIServingModels
from
vllm.entrypoints.openai.serving_models
import
OpenAIServingModels
from
vllm.entrypoints.tool_server
import
ToolServer
from
vllm.entrypoints.tool_server
import
MCPToolServer
,
ToolServer
from
vllm.inputs.data
import
TokensPrompt
as
EngineTokensPrompt
from
vllm.inputs.data
import
TokensPrompt
as
EngineTokensPrompt
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.outputs
import
CompletionOutput
from
vllm.outputs
import
CompletionOutput
...
@@ -238,6 +237,15 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -238,6 +237,15 @@ class OpenAIServingResponses(OpenAIServing):
if
raw_request
:
if
raw_request
:
raw_request
.
state
.
request_metadata
=
request_metadata
raw_request
.
state
.
request_metadata
=
request_metadata
if
self
.
tool_server
is
not
None
and
isinstance
(
self
.
tool_server
,
MCPToolServer
)
and
(
request
.
background
or
request
.
stream
)
and
request
.
tools
and
any
(
tool
.
type
in
[
"web_search_preview"
,
"code_interpreter"
]
for
tool
in
request
.
tools
):
return
self
.
create_error_response
(
"MCP tool server is not supported in background mode and "
"streaming mode"
)
# Schedule the request and get the result generator.
# Schedule the request and get the result generator.
generators
:
list
[
AsyncGenerator
[
ConversationContext
,
None
]]
=
[]
generators
:
list
[
AsyncGenerator
[
ConversationContext
,
None
]]
=
[]
...
@@ -844,9 +852,13 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -844,9 +852,13 @@ class OpenAIServingResponses(OpenAIServing):
type
=
"reasoning"
,
type
=
"reasoning"
,
content
=
[
content
=
[
ResponseReasoningTextContent
(
ResponseReasoningTextContent
(
text
=
previous_item
.
content
[
0
].
text
),
text
=
previous_item
.
content
[
0
].
text
,
type
=
"reasoning_text"
,
),
],
],
status
=
"completed"
,
status
=
"completed"
,
id
=
current_item_id
,
summary
=
[],
)
)
yield
_send_event
(
yield
_send_event
(
ResponseReasoningTextDoneEvent
(
ResponseReasoningTextDoneEvent
(
...
@@ -857,15 +869,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -857,15 +869,6 @@ class OpenAIServingResponses(OpenAIServing):
content_index
=
current_content_index
,
content_index
=
current_content_index
,
text
=
previous_item
.
content
[
0
].
text
,
text
=
previous_item
.
content
[
0
].
text
,
))
))
yield
_send_event
(
ResponseContentPartDoneEvent
(
type
=
"response.content_part.done"
,
item_id
=
current_item_id
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
content_index
=
current_content_index
,
part
=
reasoning_item
,
))
yield
_send_event
(
yield
_send_event
(
ResponseOutputItemDoneEvent
(
ResponseOutputItemDoneEvent
(
type
=
"response.output_item.done"
,
type
=
"response.output_item.done"
,
...
...
vllm/entrypoints/tool.py
View file @
ad344ef5
...
@@ -2,7 +2,9 @@
...
@@ -2,7 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
import
os
from
abc
import
ABC
,
abstractmethod
from
abc
import
ABC
,
abstractmethod
from
typing
import
TYPE_CHECKING
,
Any
from
typing
import
TYPE_CHECKING
,
Any
,
Optional
from
openai_harmony
import
Message
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
...
@@ -70,7 +72,16 @@ class HarmonyPythonTool(Tool):
...
@@ -70,7 +72,16 @@ class HarmonyPythonTool(Tool):
"gpt_oss is not installed, code interpreter is disabled"
)
"gpt_oss is not installed, code interpreter is disabled"
)
return
return
self
.
python_tool
=
PythonTool
()
# NOTE (Chen): as of gpt-oss 0.0.2, there is a bug in _make_response
# and we do the following monkey patch to fix it.
class
PatchedGptOssPythonTool
(
PythonTool
):
def
_make_response
(
self
,
output
:
str
,
channel
:
Optional
[
str
]
=
None
)
->
Message
:
return
super
().
_make_response
(
output
)
self
.
python_tool
=
PatchedGptOssPythonTool
()
logger
.
info_once
(
"Code interpreter tool initialized"
)
logger
.
info_once
(
"Code interpreter tool initialized"
)
async
def
get_result
(
self
,
context
:
"ConversationContext"
)
->
Any
:
async
def
get_result
(
self
,
context
:
"ConversationContext"
)
->
Any
:
...
...
vllm/entrypoints/tool_server.py
View file @
ad344ef5
...
@@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
...
@@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
from
contextlib
import
AbstractAsyncContextManager
,
asynccontextmanager
from
contextlib
import
AbstractAsyncContextManager
,
asynccontextmanager
from
typing
import
TYPE_CHECKING
,
Any
,
Optional
from
typing
import
TYPE_CHECKING
,
Any
,
Optional
from
openai_harmony
import
ToolNamespaceConfig
from
openai_harmony
import
ToolDescription
,
ToolNamespaceConfig
from
vllm.entrypoints.tool
import
HarmonyBrowserTool
,
HarmonyPythonTool
,
Tool
from
vllm.entrypoints.tool
import
HarmonyBrowserTool
,
HarmonyPythonTool
,
Tool
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
...
@@ -105,7 +105,6 @@ class MCPToolServer(ToolServer):
...
@@ -105,7 +105,6 @@ class MCPToolServer(ToolServer):
self
.
harmony_tool_descriptions
=
{}
self
.
harmony_tool_descriptions
=
{}
async
def
add_tool_server
(
self
,
server_url
:
str
):
async
def
add_tool_server
(
self
,
server_url
:
str
):
from
mcp.types
import
ToolDescription
tool_urls
=
server_url
.
split
(
","
)
tool_urls
=
server_url
.
split
(
","
)
self
.
harmony_tool_descriptions
=
{}
self
.
harmony_tool_descriptions
=
{}
self
.
urls
:
dict
[
str
,
str
]
=
{}
self
.
urls
:
dict
[
str
,
str
]
=
{}
...
@@ -133,6 +132,8 @@ class MCPToolServer(ToolServer):
...
@@ -133,6 +132,8 @@ class MCPToolServer(ToolServer):
logger
.
warning
(
logger
.
warning
(
"Tool %s already exists. Ignoring duplicate tool server %s"
,
"Tool %s already exists. Ignoring duplicate tool server %s"
,
tool_from_mcp
.
name
,
url
)
tool_from_mcp
.
name
,
url
)
logger
.
info
(
"MCPToolServer initialized with tools: %s"
,
list
(
self
.
harmony_tool_descriptions
.
keys
()))
def
has_tool
(
self
,
tool_name
:
str
):
def
has_tool
(
self
,
tool_name
:
str
):
return
tool_name
in
self
.
harmony_tool_descriptions
return
tool_name
in
self
.
harmony_tool_descriptions
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment