Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
444f0e3f
Unverified
Commit
444f0e3f
authored
Dec 07, 2025
by
daniel-salib
Committed by
GitHub
Dec 08, 2025
Browse files
[Frontend] Add MCP type support infrastructure to Responses API (#30054)
Signed-off-by:
Daniel Salib
<
danielsalib@meta.com
>
parent
af0444bf
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
309 additions
and
47 deletions
+309
-47
tests/entrypoints/openai/parser/test_harmony_utils.py
tests/entrypoints/openai/parser/test_harmony_utils.py
+176
-9
vllm/entrypoints/openai/parser/harmony_utils.py
vllm/entrypoints/openai/parser/harmony_utils.py
+125
-38
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+8
-0
No files found.
tests/entrypoints/openai/parser/test_harmony_utils.py
View file @
444f0e3f
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
openai.types.responses
import
ResponseFunctionToolCall
,
ResponseReasoningItem
from
openai.types.responses
import
ResponseFunctionToolCall
,
ResponseReasoningItem
from
openai.types.responses.response_output_item
import
McpCall
from
openai_harmony
import
Author
,
Message
,
Role
,
TextContent
from
openai_harmony
import
Author
,
Message
,
Role
,
TextContent
from
vllm.entrypoints.openai.parser.harmony_utils
import
(
from
vllm.entrypoints.openai.parser.harmony_utils
import
(
...
@@ -400,17 +401,19 @@ class TestParseOutputMessage:
...
@@ -400,17 +401,19 @@ class TestParseOutputMessage:
assert
output_items
[
0
].
arguments
==
'{"location": "San Francisco"}'
assert
output_items
[
0
].
arguments
==
'{"location": "San Francisco"}'
assert
output_items
[
1
].
arguments
==
'{"location": "New York"}'
assert
output_items
[
1
].
arguments
==
'{"location": "New York"}'
def
test_commentary_with_unknown_recipient_
raises_error
(
self
):
def
test_commentary_with_unknown_recipient_
creates_mcp_call
(
self
):
"""Test that commentary with unknown recipient
raises ValueError
."""
"""Test that commentary with unknown recipient
creates MCP call
."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"some content"
)
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"arg": "value"}'
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"
unknown_recipient
"
)
message
=
message
.
with_recipient
(
"
custom_tool
"
)
try
:
output_items
=
parse_output_message
(
message
)
parse_output_message
(
message
)
raise
AssertionError
(
"Expected ValueError to be raised"
)
assert
len
(
output_items
)
==
1
except
ValueError
as
e
:
assert
isinstance
(
output_items
[
0
],
McpCall
)
assert
"Unknown recipient: unknown_recipient"
in
str
(
e
)
assert
output_items
[
0
].
type
==
"mcp_call"
assert
output_items
[
0
].
name
==
"custom_tool"
assert
output_items
[
0
].
server_label
==
"custom_tool"
def
test_analysis_channel_creates_reasoning
(
self
):
def
test_analysis_channel_creates_reasoning
(
self
):
"""Test that analysis channel creates reasoning items."""
"""Test that analysis channel creates reasoning items."""
...
@@ -451,3 +454,167 @@ def test_has_custom_tools() -> None:
...
@@ -451,3 +454,167 @@ def test_has_custom_tools() -> None:
assert
has_custom_tools
(
assert
has_custom_tools
(
{
"web_search_preview"
,
"code_interpreter"
,
"container"
,
"others"
}
{
"web_search_preview"
,
"code_interpreter"
,
"container"
,
"others"
}
)
)
def
test_parse_mcp_call_basic
()
->
None
:
"""Test that MCP calls are parsed with correct type and server_label."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"path": "/tmp"}'
)
message
=
message
.
with_recipient
(
"filesystem"
)
message
=
message
.
with_channel
(
"commentary"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
McpCall
)
assert
output_items
[
0
].
type
==
"mcp_call"
assert
output_items
[
0
].
name
==
"filesystem"
assert
output_items
[
0
].
server_label
==
"filesystem"
assert
output_items
[
0
].
arguments
==
'{"path": "/tmp"}'
assert
output_items
[
0
].
status
==
"completed"
def
test_parse_mcp_call_dotted_recipient
()
->
None
:
"""Test that dotted recipients extract the tool name correctly."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"cmd": "ls"}'
)
message
=
message
.
with_recipient
(
"repo_browser.list"
)
message
=
message
.
with_channel
(
"commentary"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
McpCall
)
assert
output_items
[
0
].
name
==
"list"
assert
output_items
[
0
].
server_label
==
"repo_browser"
def
test_mcp_vs_function_call
()
->
None
:
"""Test that function calls are not parsed as MCP calls."""
func_message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"arg": "value"}'
)
func_message
=
func_message
.
with_recipient
(
"functions.my_tool"
)
func_message
=
func_message
.
with_channel
(
"commentary"
)
func_items
=
parse_output_message
(
func_message
)
assert
len
(
func_items
)
==
1
assert
not
isinstance
(
func_items
[
0
],
McpCall
)
assert
func_items
[
0
].
type
==
"function_call"
def
test_mcp_vs_builtin_tools
()
->
None
:
"""Test that built-in tools (python, container) are not parsed as MCP calls."""
# Test python (built-in tool) - should be reasoning, not MCP
python_message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"print('hello')"
)
python_message
=
python_message
.
with_recipient
(
"python"
)
python_message
=
python_message
.
with_channel
(
"commentary"
)
python_items
=
parse_output_message
(
python_message
)
assert
len
(
python_items
)
==
1
assert
not
isinstance
(
python_items
[
0
],
McpCall
)
assert
python_items
[
0
].
type
==
"reasoning"
def
test_parse_remaining_state_commentary_channel
()
->
None
:
"""Test parse_remaining_state with commentary channel and various recipients."""
from
unittest.mock
import
Mock
from
vllm.entrypoints.openai.parser.harmony_utils
import
parse_remaining_state
# Test 1: functions.* recipient → should return function tool call
parser_func
=
Mock
()
parser_func
.
current_content
=
'{"arg": "value"}'
parser_func
.
current_role
=
Role
.
ASSISTANT
parser_func
.
current_channel
=
"commentary"
parser_func
.
current_recipient
=
"functions.my_tool"
func_items
=
parse_remaining_state
(
parser_func
)
assert
len
(
func_items
)
==
1
assert
not
isinstance
(
func_items
[
0
],
McpCall
)
assert
func_items
[
0
].
type
==
"function_call"
assert
func_items
[
0
].
name
==
"my_tool"
assert
func_items
[
0
].
status
==
"in_progress"
# Test 2: MCP tool (not builtin) → should return MCP call
parser_mcp
=
Mock
()
parser_mcp
.
current_content
=
'{"path": "/tmp"}'
parser_mcp
.
current_role
=
Role
.
ASSISTANT
parser_mcp
.
current_channel
=
"commentary"
parser_mcp
.
current_recipient
=
"filesystem"
mcp_items
=
parse_remaining_state
(
parser_mcp
)
assert
len
(
mcp_items
)
==
1
assert
isinstance
(
mcp_items
[
0
],
McpCall
)
assert
mcp_items
[
0
].
type
==
"mcp_call"
assert
mcp_items
[
0
].
name
==
"filesystem"
assert
mcp_items
[
0
].
server_label
==
"filesystem"
assert
mcp_items
[
0
].
status
==
"in_progress"
# Test 3: Built-in tool (python)
# should NOT return MCP call, falls through to reasoning
parser_builtin
=
Mock
()
parser_builtin
.
current_content
=
"print('hello')"
parser_builtin
.
current_role
=
Role
.
ASSISTANT
parser_builtin
.
current_channel
=
"commentary"
parser_builtin
.
current_recipient
=
"python"
builtin_items
=
parse_remaining_state
(
parser_builtin
)
# Should fall through to reasoning logic
assert
len
(
builtin_items
)
==
1
assert
not
isinstance
(
builtin_items
[
0
],
McpCall
)
assert
builtin_items
[
0
].
type
==
"reasoning"
def
test_parse_remaining_state_analysis_channel
()
->
None
:
"""Test parse_remaining_state with analysis channel and various recipients."""
from
unittest.mock
import
Mock
from
vllm.entrypoints.openai.parser.harmony_utils
import
parse_remaining_state
# Test 1: functions.* recipient → should return function tool call
parser_func
=
Mock
()
parser_func
.
current_content
=
'{"arg": "value"}'
parser_func
.
current_role
=
Role
.
ASSISTANT
parser_func
.
current_channel
=
"analysis"
parser_func
.
current_recipient
=
"functions.my_tool"
func_items
=
parse_remaining_state
(
parser_func
)
assert
len
(
func_items
)
==
1
assert
not
isinstance
(
func_items
[
0
],
McpCall
)
assert
func_items
[
0
].
type
==
"function_call"
assert
func_items
[
0
].
name
==
"my_tool"
assert
func_items
[
0
].
status
==
"in_progress"
# Test 2: MCP tool (not builtin) → should return MCP call
parser_mcp
=
Mock
()
parser_mcp
.
current_content
=
'{"query": "test"}'
parser_mcp
.
current_role
=
Role
.
ASSISTANT
parser_mcp
.
current_channel
=
"analysis"
parser_mcp
.
current_recipient
=
"database"
mcp_items
=
parse_remaining_state
(
parser_mcp
)
assert
len
(
mcp_items
)
==
1
assert
isinstance
(
mcp_items
[
0
],
McpCall
)
assert
mcp_items
[
0
].
type
==
"mcp_call"
assert
mcp_items
[
0
].
name
==
"database"
assert
mcp_items
[
0
].
server_label
==
"database"
assert
mcp_items
[
0
].
status
==
"in_progress"
# Test 3: Built-in tool (container)
# should NOT return MCP call, falls through to reasoning
parser_builtin
=
Mock
()
parser_builtin
.
current_content
=
"docker run"
parser_builtin
.
current_role
=
Role
.
ASSISTANT
parser_builtin
.
current_channel
=
"analysis"
parser_builtin
.
current_recipient
=
"container"
builtin_items
=
parse_remaining_state
(
parser_builtin
)
# Should fall through to reasoning logic
assert
len
(
builtin_items
)
==
1
assert
not
isinstance
(
builtin_items
[
0
],
McpCall
)
assert
builtin_items
[
0
].
type
==
"reasoning"
vllm/entrypoints/openai/parser/harmony_utils.py
View file @
444f0e3f
...
@@ -19,6 +19,7 @@ from openai.types.responses.response_function_web_search import (
...
@@ -19,6 +19,7 @@ from openai.types.responses.response_function_web_search import (
ActionSearch
,
ActionSearch
,
ResponseFunctionWebSearch
,
ResponseFunctionWebSearch
,
)
)
from
openai.types.responses.response_output_item
import
McpCall
from
openai.types.responses.response_reasoning_item
import
(
from
openai.types.responses.response_reasoning_item
import
(
Content
as
ResponseReasoningTextContent
,
Content
as
ResponseReasoningTextContent
,
)
)
...
@@ -155,11 +156,7 @@ def get_developer_message(
...
@@ -155,11 +156,7 @@ def get_developer_message(
"web_search_preview"
,
"web_search_preview"
,
"code_interpreter"
,
"code_interpreter"
,
"container"
,
"container"
,
"mcp"
,
):
):
# These are built-in tools that are added to the system message.
# Adding in MCP for now until we support MCP tools executed
# server side
pass
pass
elif
tool
.
type
==
"function"
:
elif
tool
.
type
==
"function"
:
...
@@ -427,6 +424,44 @@ def _parse_final_message(message: Message) -> ResponseOutputItem:
...
@@ -427,6 +424,44 @@ def _parse_final_message(message: Message) -> ResponseOutputItem:
)
)
def
_parse_mcp_recipient
(
recipient
:
str
)
->
tuple
[
str
,
str
]:
"""
Parse MCP recipient into (server_label, tool_name).
For dotted recipients like "repo_browser.list":
- server_label: "repo_browser" (namespace/server)
- tool_name: "list" (specific tool)
For simple recipients like "filesystem":
- server_label: "filesystem"
- tool_name: "filesystem"
"""
if
"."
in
recipient
:
server_label
=
recipient
.
split
(
"."
)[
0
]
tool_name
=
recipient
.
split
(
"."
)[
-
1
]
else
:
server_label
=
recipient
tool_name
=
recipient
return
server_label
,
tool_name
def
_parse_mcp_call
(
message
:
Message
,
recipient
:
str
)
->
list
[
ResponseOutputItem
]:
"""Parse MCP calls into MCP call items."""
server_label
,
tool_name
=
_parse_mcp_recipient
(
recipient
)
output_items
=
[]
for
content
in
message
.
content
:
response_item
=
McpCall
(
arguments
=
content
.
text
,
type
=
"mcp_call"
,
name
=
tool_name
,
server_label
=
server_label
,
id
=
f
"mcp_
{
random_uuid
()
}
"
,
status
=
"completed"
,
)
output_items
.
append
(
response_item
)
return
output_items
def
parse_output_message
(
message
:
Message
)
->
list
[
ResponseOutputItem
]:
def
parse_output_message
(
message
:
Message
)
->
list
[
ResponseOutputItem
]:
"""
"""
Parse a Harmony message into a list of output response items.
Parse a Harmony message into a list of output response items.
...
@@ -440,33 +475,34 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
...
@@ -440,33 +475,34 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
output_items
:
list
[
ResponseOutputItem
]
=
[]
output_items
:
list
[
ResponseOutputItem
]
=
[]
recipient
=
message
.
recipient
recipient
=
message
.
recipient
if
recipient
is
not
None
:
# Browser tool calls
# Browser tool calls
if
recipient
is
not
None
and
recipient
.
startswith
(
"browser."
):
if
recipient
.
startswith
(
"browser."
):
output_items
.
append
(
_parse_browser_tool_call
(
message
,
recipient
))
output_items
.
append
(
_parse_browser_tool_call
(
message
,
recipient
))
# Analysis channel (reasoning/chain-of-thought)
# Function calls (should only happen on commentary channel)
elif
message
.
channel
==
"commentary"
and
recipient
.
startswith
(
"functions."
):
output_items
.
extend
(
_parse_function_call
(
message
,
recipient
))
# Built-in tools are treated as reasoning
elif
recipient
.
startswith
((
"python"
,
"browser"
,
"container"
)):
# Built-in tool recipients (python/browser/container)
# generate reasoning output
output_items
.
extend
(
_parse_reasoning_content
(
message
))
# All other recipients are MCP calls
else
:
output_items
.
extend
(
_parse_mcp_call
(
message
,
recipient
))
# No recipient - handle based on channel for non-tool messages
elif
message
.
channel
==
"analysis"
:
elif
message
.
channel
==
"analysis"
:
output_items
.
extend
(
_parse_reasoning_content
(
message
))
output_items
.
extend
(
_parse_reasoning_content
(
message
))
# Commentary channel
elif
message
.
channel
==
"commentary"
:
elif
message
.
channel
==
"commentary"
:
# Function calls
if
recipient
is
not
None
and
recipient
.
startswith
(
"functions."
):
output_items
.
extend
(
_parse_function_call
(
message
,
recipient
))
# Built-in tools on commentary channel are treated as reasoning for now
elif
(
recipient
is
None
# Preambles: explanatory text before tool calls
or
recipient
.
startswith
((
"python"
,
"browser"
,
"container"
))
):
# Per Harmony format, commentary channel can contain preambles to calling
# Per Harmony format, commentary channel can contain preambles to calling
# multiple functions - explanatory text with no recipient. Built-in tool
# multiple functions - explanatory text with no recipient
# recipients (python/browser/container) also generate reasoning output.
output_items
.
extend
(
_parse_reasoning_content
(
message
))
output_items
.
extend
(
_parse_reasoning_content
(
message
))
else
:
raise
ValueError
(
f
"Unknown recipient:
{
recipient
}
"
)
# Final output message
elif
message
.
channel
==
"final"
:
elif
message
.
channel
==
"final"
:
output_items
.
append
(
_parse_final_message
(
message
))
output_items
.
append
(
_parse_final_message
(
message
))
...
@@ -485,8 +521,57 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
...
@@ -485,8 +521,57 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
if
current_recipient
is
not
None
and
current_recipient
.
startswith
(
"browser."
):
if
current_recipient
is
not
None
and
current_recipient
.
startswith
(
"browser."
):
return
[]
return
[]
if
current_recipient
and
parser
.
current_channel
in
(
"commentary"
,
"analysis"
):
if
current_recipient
.
startswith
(
"functions."
):
rid
=
random_uuid
()
return
[
ResponseFunctionToolCall
(
arguments
=
parser
.
current_content
,
call_id
=
f
"call_
{
rid
}
"
,
type
=
"function_call"
,
name
=
current_recipient
.
split
(
"."
)[
-
1
],
id
=
f
"fc_
{
rid
}
"
,
status
=
"in_progress"
,
)
]
# Built-in tools (python, browser, container) should be treated as reasoning
elif
not
(
current_recipient
.
startswith
(
"python"
)
or
current_recipient
.
startswith
(
"browser"
)
or
current_recipient
.
startswith
(
"container"
)
):
# All other recipients are MCP calls
rid
=
random_uuid
()
server_label
,
tool_name
=
_parse_mcp_recipient
(
current_recipient
)
return
[
McpCall
(
arguments
=
parser
.
current_content
,
type
=
"mcp_call"
,
name
=
tool_name
,
server_label
=
server_label
,
id
=
f
"mcp_
{
rid
}
"
,
status
=
"in_progress"
,
)
]
if
parser
.
current_channel
==
"commentary"
:
return
[
ResponseReasoningItem
(
id
=
f
"rs_
{
random_uuid
()
}
"
,
summary
=
[],
type
=
"reasoning"
,
content
=
[
ResponseReasoningTextContent
(
text
=
parser
.
current_content
,
type
=
"reasoning_text"
)
],
status
=
None
,
)
]
if
parser
.
current_channel
==
"analysis"
:
if
parser
.
current_channel
==
"analysis"
:
reasoning_item
=
ResponseReasoningItem
(
return
[
ResponseReasoningItem
(
id
=
f
"rs_
{
random_uuid
()
}
"
,
id
=
f
"rs_
{
random_uuid
()
}
"
,
summary
=
[],
summary
=
[],
type
=
"reasoning"
,
type
=
"reasoning"
,
...
@@ -497,8 +582,9 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
...
@@ -497,8 +582,9 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
],
],
status
=
None
,
status
=
None
,
)
)
return
[
reasoning_item
]
]
elif
parser
.
current_channel
==
"final"
:
if
parser
.
current_channel
==
"final"
:
output_text
=
ResponseOutputText
(
output_text
=
ResponseOutputText
(
text
=
parser
.
current_content
,
text
=
parser
.
current_content
,
annotations
=
[],
# TODO
annotations
=
[],
# TODO
...
@@ -515,6 +601,7 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
...
@@ -515,6 +601,7 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
type
=
"message"
,
type
=
"message"
,
)
)
return
[
text_item
]
return
[
text_item
]
return
[]
return
[]
...
...
vllm/entrypoints/openai/protocol.py
View file @
444f0e3f
...
@@ -25,6 +25,10 @@ from openai.types.responses import (
...
@@ -25,6 +25,10 @@ from openai.types.responses import (
ResponseContentPartDoneEvent
,
ResponseContentPartDoneEvent
,
ResponseFunctionToolCall
,
ResponseFunctionToolCall
,
ResponseInputItemParam
,
ResponseInputItemParam
,
ResponseMcpCallArgumentsDeltaEvent
,
ResponseMcpCallArgumentsDoneEvent
,
ResponseMcpCallCompletedEvent
,
ResponseMcpCallInProgressEvent
,
ResponseOutputItem
,
ResponseOutputItem
,
ResponseOutputItemAddedEvent
,
ResponseOutputItemAddedEvent
,
ResponseOutputItemDoneEvent
,
ResponseOutputItemDoneEvent
,
...
@@ -1790,6 +1794,10 @@ StreamingResponsesResponse: TypeAlias = (
...
@@ -1790,6 +1794,10 @@ StreamingResponsesResponse: TypeAlias = (
|
ResponseCodeInterpreterCallCodeDoneEvent
|
ResponseCodeInterpreterCallCodeDoneEvent
|
ResponseCodeInterpreterCallInterpretingEvent
|
ResponseCodeInterpreterCallInterpretingEvent
|
ResponseCodeInterpreterCallCompletedEvent
|
ResponseCodeInterpreterCallCompletedEvent
|
ResponseMcpCallArgumentsDeltaEvent
|
ResponseMcpCallArgumentsDoneEvent
|
ResponseMcpCallInProgressEvent
|
ResponseMcpCallCompletedEvent
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment