Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
186ea22e
Unverified
Commit
186ea22e
authored
Feb 26, 2026
by
Flora Feng
Committed by
GitHub
Feb 26, 2026
Browse files
[Misc][Harmony] Move Responses API only harmony utils to responses/harmony.py (#35339)
Signed-off-by:
sfeng33
<
4florafeng@gmail.com
>
parent
4a9c07a0
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1040 additions
and
990 deletions
+1040
-990
tests/entrypoints/openai/parser/test_harmony_utils.py
tests/entrypoints/openai/parser/test_harmony_utils.py
+7
-460
tests/entrypoints/openai/responses/test_harmony_utils.py
tests/entrypoints/openai/responses/test_harmony_utils.py
+463
-0
tests/entrypoints/openai/responses/test_mcp_tools.py
tests/entrypoints/openai/responses/test_mcp_tools.py
+5
-5
vllm/entrypoints/openai/parser/harmony_utils.py
vllm/entrypoints/openai/parser/harmony_utils.py
+2
-516
vllm/entrypoints/openai/responses/harmony.py
vllm/entrypoints/openai/responses/harmony.py
+552
-0
vllm/entrypoints/openai/responses/serving.py
vllm/entrypoints/openai/responses/serving.py
+11
-9
No files found.
tests/entrypoints/openai/parser/test_harmony_utils.py
View file @
186ea22e
...
...
@@ -2,13 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
from
openai.types.responses
import
(
ResponseFunctionToolCall
,
ResponseOutputMessage
,
ResponseReasoningItem
,
)
from
openai.types.responses.response_output_item
import
McpCall
from
openai_harmony
import
Author
,
Message
,
Role
,
TextContent
from
openai_harmony
import
Message
,
Role
from
tests.entrypoints.openai.utils
import
verify_harmony_messages
from
vllm.entrypoints.openai.parser.harmony_utils
import
(
...
...
@@ -18,20 +12,21 @@ from vllm.entrypoints.openai.parser.harmony_utils import (
has_custom_tools
,
parse_chat_input_to_harmony_message
,
parse_chat_output
,
parse_input_to_harmony_message
,
parse_output_message
,
)
from
vllm.entrypoints.openai.responses.harmony
import
(
response_previous_input_to_harmony
,
)
class
TestCommonParseInputToHarmonyMessage
:
"""
Tests for scenarios that are common to both Chat Completion
parse_chat_input_to_harmony_message and Response
e
s API
parse
_input_to_harmony
_message
functions.
parse_chat_input_to_harmony_message and Responses API
response_previous
_input_to_harmony functions.
"""
@
pytest
.
fixture
(
params
=
[
parse_chat_input_to_harmony_message
,
parse
_input_to_harmony
_message
]
params
=
[
parse_chat_input_to_harmony_message
,
response_previous
_input_to_harmony
]
)
def
parse_function
(
self
,
request
):
return
request
.
param
...
...
@@ -216,81 +211,6 @@ class TestCommonParseInputToHarmonyMessage:
assert
messages
[
0
].
content
[
1
].
text
==
"actual text"
class
TestParseInputToHarmonyMessage
:
"""
Tests for scenarios that are specific to the Responses API
parse_input_to_harmony_message function.
"""
def
test_message_with_empty_content
(
self
):
"""Test parsing message with empty string content."""
chat_msg
=
{
"role"
:
"user"
,
"content"
:
""
,
}
messages
=
parse_input_to_harmony_message
(
chat_msg
)
assert
len
(
messages
)
==
1
assert
messages
[
0
].
content
[
0
].
text
==
""
def
test_tool_message_with_string_content
(
self
):
"""Test parsing tool message with string content."""
chat_msg
=
{
"role"
:
"tool"
,
"name"
:
"get_weather"
,
"content"
:
"The weather in San Francisco is sunny, 72°F"
,
}
messages
=
parse_input_to_harmony_message
(
chat_msg
)
assert
len
(
messages
)
==
1
assert
messages
[
0
].
author
.
role
==
Role
.
TOOL
assert
messages
[
0
].
author
.
name
==
"functions.get_weather"
assert
(
messages
[
0
].
content
[
0
].
text
==
"The weather in San Francisco is sunny, 72°F"
)
assert
messages
[
0
].
channel
==
"commentary"
def
test_tool_message_with_array_content
(
self
):
"""Test parsing tool message with array content."""
chat_msg
=
{
"role"
:
"tool"
,
"name"
:
"search_results"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"Result 1: "
},
{
"type"
:
"text"
,
"text"
:
"Result 2: "
},
{
"type"
:
"image"
,
"url"
:
"http://example.com/img.png"
,
},
# Should be ignored
{
"type"
:
"text"
,
"text"
:
"Result 3"
},
],
}
messages
=
parse_input_to_harmony_message
(
chat_msg
)
assert
len
(
messages
)
==
1
assert
messages
[
0
].
author
.
role
==
Role
.
TOOL
assert
messages
[
0
].
author
.
name
==
"functions.search_results"
assert
messages
[
0
].
content
[
0
].
text
==
"Result 1: Result 2: Result 3"
def
test_tool_message_with_empty_content
(
self
):
"""Test parsing tool message with None content."""
chat_msg
=
{
"role"
:
"tool"
,
"name"
:
"empty_tool"
,
"content"
:
None
,
}
messages
=
parse_input_to_harmony_message
(
chat_msg
)
assert
len
(
messages
)
==
1
assert
messages
[
0
].
author
.
role
==
Role
.
TOOL
assert
messages
[
0
].
author
.
name
==
"functions.empty_tool"
assert
messages
[
0
].
content
[
0
].
text
==
""
class
TestParseChatInputToHarmonyMessage
:
"""
Tests for scenarios that are specific to the Chat Completion API
...
...
@@ -888,200 +808,6 @@ class TestParseChatOutput:
assert
final_content
==
"Let me look that up.
\n
The answer is 42."
class
TestParseOutputMessage
:
"""Tests for parse_output_message function."""
def
test_commentary_with_no_recipient_creates_message
(
self
):
"""Test that commentary with recipient=None (preambles) creates message items.
Per Harmony format, preambles are intended to be shown to end-users,
unlike analysis channel content which is hidden reasoning.
See: https://cookbook.openai.com/articles/openai-harmony
"""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"I will now search for the weather information."
)
message
=
message
.
with_channel
(
"commentary"
)
# recipient is None by default, representing a preamble
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseOutputMessage
)
assert
output_items
[
0
].
type
==
"message"
assert
output_items
[
0
].
role
==
"assistant"
assert
output_items
[
0
].
status
==
"completed"
assert
len
(
output_items
[
0
].
content
)
==
1
assert
output_items
[
0
].
content
[
0
].
type
==
"output_text"
assert
(
output_items
[
0
].
content
[
0
].
text
==
"I will now search for the weather information."
)
def
test_commentary_with_function_recipient_creates_function_call
(
self
):
"""Test commentary with recipient='functions.X' creates function calls."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"location": "San Francisco", "units": "celsius"}'
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"functions.get_weather"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseFunctionToolCall
)
assert
output_items
[
0
].
type
==
"function_call"
assert
output_items
[
0
].
name
==
"get_weather"
assert
(
output_items
[
0
].
arguments
==
'{"location": "San Francisco", "units": "celsius"}'
)
assert
output_items
[
0
].
call_id
.
startswith
(
"call_"
)
assert
output_items
[
0
].
id
.
startswith
(
"fc_"
)
def
test_commentary_with_python_recipient_creates_reasoning
(
self
):
"""Test that commentary with recipient='python' creates reasoning items."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"import numpy as np
\n
print(np.array([1, 2, 3]))"
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"python"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseReasoningItem
)
assert
output_items
[
0
].
type
==
"reasoning"
assert
(
output_items
[
0
].
content
[
0
].
text
==
"import numpy as np
\n
print(np.array([1, 2, 3]))"
)
def
test_commentary_with_browser_recipient_creates_reasoning
(
self
):
"""Test that commentary with recipient='browser' creates reasoning items."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"Navigating to the specified URL"
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"browser"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseReasoningItem
)
assert
output_items
[
0
].
type
==
"reasoning"
assert
output_items
[
0
].
content
[
0
].
text
==
"Navigating to the specified URL"
def
test_commentary_with_container_recipient_creates_reasoning
(
self
):
"""Test that commentary with recipient='container' creates reasoning items."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"Running command in container"
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"container"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseReasoningItem
)
assert
output_items
[
0
].
type
==
"reasoning"
assert
output_items
[
0
].
content
[
0
].
text
==
"Running command in container"
def
test_commentary_with_empty_content_and_no_recipient
(
self
):
"""Test edge case: empty commentary with recipient=None."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
""
)
message
=
message
.
with_channel
(
"commentary"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseOutputMessage
)
assert
output_items
[
0
].
content
[
0
].
text
==
""
def
test_commentary_with_multiple_contents_and_no_recipient
(
self
):
"""Test multiple content items in commentary with no recipient."""
contents
=
[
TextContent
(
text
=
"Step 1: Analyze the request"
),
TextContent
(
text
=
"Step 2: Prepare to call functions"
),
]
message
=
Message
.
from_role_and_contents
(
Role
.
ASSISTANT
,
contents
)
message
=
message
.
with_channel
(
"commentary"
)
output_items
=
parse_output_message
(
message
)
# _parse_final_message returns single ResponseOutputMessage with
# multiple contents
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseOutputMessage
)
assert
len
(
output_items
[
0
].
content
)
==
2
assert
output_items
[
0
].
content
[
0
].
text
==
"Step 1: Analyze the request"
assert
output_items
[
0
].
content
[
1
].
text
==
"Step 2: Prepare to call functions"
def
test_commentary_with_multiple_function_calls
(
self
):
"""Test multiple function calls in commentary channel."""
contents
=
[
TextContent
(
text
=
'{"location": "San Francisco"}'
),
TextContent
(
text
=
'{"location": "New York"}'
),
]
message
=
Message
.
from_role_and_contents
(
Role
.
ASSISTANT
,
contents
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"functions.get_weather"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
2
assert
all
(
isinstance
(
item
,
ResponseFunctionToolCall
)
for
item
in
output_items
)
assert
output_items
[
0
].
name
==
"get_weather"
assert
output_items
[
1
].
name
==
"get_weather"
assert
output_items
[
0
].
arguments
==
'{"location": "San Francisco"}'
assert
output_items
[
1
].
arguments
==
'{"location": "New York"}'
def
test_commentary_with_unknown_recipient_creates_mcp_call
(
self
):
"""Test that commentary with unknown recipient creates MCP call."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"arg": "value"}'
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"custom_tool"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
McpCall
)
assert
output_items
[
0
].
type
==
"mcp_call"
assert
output_items
[
0
].
name
==
"custom_tool"
assert
output_items
[
0
].
server_label
==
"custom_tool"
def
test_analysis_channel_creates_reasoning
(
self
):
"""Test that analysis channel creates reasoning items."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"Analyzing the problem step by step..."
)
message
=
message
.
with_channel
(
"analysis"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseReasoningItem
)
assert
output_items
[
0
].
type
==
"reasoning"
assert
(
output_items
[
0
].
content
[
0
].
text
==
"Analyzing the problem step by step..."
)
def
test_non_assistant_message_returns_empty
(
self
):
"""Test that non-assistant messages return empty list.
Per the implementation, tool messages to assistant (e.g., search results)
are not included in final output to align with OpenAI behavior.
"""
message
=
Message
.
from_author_and_content
(
Author
.
new
(
Role
.
TOOL
,
"functions.get_weather"
),
"The weather is sunny, 72°F"
,
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
0
def
test_has_custom_tools
()
->
None
:
assert
not
has_custom_tools
(
set
())
assert
not
has_custom_tools
({
"web_search_preview"
,
"code_interpreter"
,
"container"
})
...
...
@@ -1091,185 +817,6 @@ def test_has_custom_tools() -> None:
)
def
test_parse_mcp_call_basic
()
->
None
:
"""Test that MCP calls are parsed with correct type and server_label."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"path": "/tmp"}'
)
message
=
message
.
with_recipient
(
"filesystem"
)
message
=
message
.
with_channel
(
"commentary"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
McpCall
)
assert
output_items
[
0
].
type
==
"mcp_call"
assert
output_items
[
0
].
name
==
"filesystem"
assert
output_items
[
0
].
server_label
==
"filesystem"
assert
output_items
[
0
].
arguments
==
'{"path": "/tmp"}'
assert
output_items
[
0
].
status
==
"completed"
def
test_parse_mcp_call_dotted_recipient
()
->
None
:
"""Test that dotted recipients extract the tool name correctly."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"cmd": "ls"}'
)
message
=
message
.
with_recipient
(
"repo_browser.list"
)
message
=
message
.
with_channel
(
"commentary"
)
output_items
=
parse_output_message
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
McpCall
)
assert
output_items
[
0
].
name
==
"list"
assert
output_items
[
0
].
server_label
==
"repo_browser"
def
test_mcp_vs_function_call
()
->
None
:
"""Test that function calls are not parsed as MCP calls."""
func_message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"arg": "value"}'
)
func_message
=
func_message
.
with_recipient
(
"functions.my_tool"
)
func_message
=
func_message
.
with_channel
(
"commentary"
)
func_items
=
parse_output_message
(
func_message
)
assert
len
(
func_items
)
==
1
assert
not
isinstance
(
func_items
[
0
],
McpCall
)
assert
func_items
[
0
].
type
==
"function_call"
def
test_mcp_vs_builtin_tools
()
->
None
:
"""Test that built-in tools (python, container) are not parsed as MCP calls."""
# Test python (built-in tool) - should be reasoning, not MCP
python_message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"print('hello')"
)
python_message
=
python_message
.
with_recipient
(
"python"
)
python_message
=
python_message
.
with_channel
(
"commentary"
)
python_items
=
parse_output_message
(
python_message
)
assert
len
(
python_items
)
==
1
assert
not
isinstance
(
python_items
[
0
],
McpCall
)
assert
python_items
[
0
].
type
==
"reasoning"
def
test_parse_remaining_state_commentary_channel
()
->
None
:
"""Test parse_remaining_state with commentary channel and various recipients."""
from
unittest.mock
import
Mock
from
vllm.entrypoints.openai.parser.harmony_utils
import
parse_remaining_state
# Test 1: functions.* recipient → should return function tool call
parser_func
=
Mock
()
parser_func
.
current_content
=
'{"arg": "value"}'
parser_func
.
current_role
=
Role
.
ASSISTANT
parser_func
.
current_channel
=
"commentary"
parser_func
.
current_recipient
=
"functions.my_tool"
func_items
=
parse_remaining_state
(
parser_func
)
assert
len
(
func_items
)
==
1
assert
not
isinstance
(
func_items
[
0
],
McpCall
)
assert
func_items
[
0
].
type
==
"function_call"
assert
func_items
[
0
].
name
==
"my_tool"
assert
func_items
[
0
].
status
==
"in_progress"
# Test 2: MCP tool (not builtin) → should return MCP call
parser_mcp
=
Mock
()
parser_mcp
.
current_content
=
'{"path": "/tmp"}'
parser_mcp
.
current_role
=
Role
.
ASSISTANT
parser_mcp
.
current_channel
=
"commentary"
parser_mcp
.
current_recipient
=
"filesystem"
mcp_items
=
parse_remaining_state
(
parser_mcp
)
assert
len
(
mcp_items
)
==
1
assert
isinstance
(
mcp_items
[
0
],
McpCall
)
assert
mcp_items
[
0
].
type
==
"mcp_call"
assert
mcp_items
[
0
].
name
==
"filesystem"
assert
mcp_items
[
0
].
server_label
==
"filesystem"
assert
mcp_items
[
0
].
status
==
"in_progress"
# Test 3: Built-in tool (python)
# should NOT return MCP call, returns reasoning (internal tool interaction)
parser_builtin
=
Mock
()
parser_builtin
.
current_content
=
"print('hello')"
parser_builtin
.
current_role
=
Role
.
ASSISTANT
parser_builtin
.
current_channel
=
"commentary"
parser_builtin
.
current_recipient
=
"python"
builtin_items
=
parse_remaining_state
(
parser_builtin
)
# Built-in tools explicitly return reasoning
assert
len
(
builtin_items
)
==
1
assert
not
isinstance
(
builtin_items
[
0
],
McpCall
)
assert
builtin_items
[
0
].
type
==
"reasoning"
# Test 4: No recipient (preamble) → should return message, not reasoning
parser_preamble
=
Mock
()
parser_preamble
.
current_content
=
"I'll search for that information now."
parser_preamble
.
current_role
=
Role
.
ASSISTANT
parser_preamble
.
current_channel
=
"commentary"
parser_preamble
.
current_recipient
=
None
preamble_items
=
parse_remaining_state
(
parser_preamble
)
assert
len
(
preamble_items
)
==
1
assert
isinstance
(
preamble_items
[
0
],
ResponseOutputMessage
)
assert
preamble_items
[
0
].
type
==
"message"
assert
preamble_items
[
0
].
content
[
0
].
text
==
"I'll search for that information now."
assert
preamble_items
[
0
].
status
==
"incomplete"
# streaming
def
test_parse_remaining_state_analysis_channel
()
->
None
:
"""Test parse_remaining_state with analysis channel and various recipients."""
from
unittest.mock
import
Mock
from
vllm.entrypoints.openai.parser.harmony_utils
import
parse_remaining_state
# Test 1: functions.* recipient → should return function tool call
parser_func
=
Mock
()
parser_func
.
current_content
=
'{"arg": "value"}'
parser_func
.
current_role
=
Role
.
ASSISTANT
parser_func
.
current_channel
=
"analysis"
parser_func
.
current_recipient
=
"functions.my_tool"
func_items
=
parse_remaining_state
(
parser_func
)
assert
len
(
func_items
)
==
1
assert
not
isinstance
(
func_items
[
0
],
McpCall
)
assert
func_items
[
0
].
type
==
"function_call"
assert
func_items
[
0
].
name
==
"my_tool"
assert
func_items
[
0
].
status
==
"in_progress"
# Test 2: MCP tool (not builtin) → should return MCP call
parser_mcp
=
Mock
()
parser_mcp
.
current_content
=
'{"query": "test"}'
parser_mcp
.
current_role
=
Role
.
ASSISTANT
parser_mcp
.
current_channel
=
"analysis"
parser_mcp
.
current_recipient
=
"database"
mcp_items
=
parse_remaining_state
(
parser_mcp
)
assert
len
(
mcp_items
)
==
1
assert
isinstance
(
mcp_items
[
0
],
McpCall
)
assert
mcp_items
[
0
].
type
==
"mcp_call"
assert
mcp_items
[
0
].
name
==
"database"
assert
mcp_items
[
0
].
server_label
==
"database"
assert
mcp_items
[
0
].
status
==
"in_progress"
# Test 3: Built-in tool (container)
# should NOT return MCP call, falls through to reasoning
parser_builtin
=
Mock
()
parser_builtin
.
current_content
=
"docker run"
parser_builtin
.
current_role
=
Role
.
ASSISTANT
parser_builtin
.
current_channel
=
"analysis"
parser_builtin
.
current_recipient
=
"container"
builtin_items
=
parse_remaining_state
(
parser_builtin
)
# Should fall through to reasoning logic
assert
len
(
builtin_items
)
==
1
assert
not
isinstance
(
builtin_items
[
0
],
McpCall
)
assert
builtin_items
[
0
].
type
==
"reasoning"
class
TestGetSystemMessage
:
"""Tests for get_system_message channel configuration."""
...
...
tests/entrypoints/openai/responses/test_harmony_utils.py
0 → 100644
View file @
186ea22e
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for vllm.entrypoints.openai.responses.harmony."""
from
openai.types.responses
import
(
ResponseFunctionToolCall
,
ResponseOutputMessage
,
ResponseReasoningItem
,
)
from
openai.types.responses.response_output_item
import
McpCall
from
openai_harmony
import
Author
,
Message
,
Role
,
TextContent
from
vllm.entrypoints.openai.responses.harmony
import
(
harmony_to_response_output
,
parser_state_to_response_output
,
response_previous_input_to_harmony
,
)
class
TestResponsePreviousInputToHarmony
:
"""
Tests for scenarios that are specific to the Responses API
response_previous_input_to_harmony function.
"""
def
test_message_with_empty_content
(
self
):
"""Test parsing message with empty string content."""
chat_msg
=
{
"role"
:
"user"
,
"content"
:
""
,
}
messages
=
response_previous_input_to_harmony
(
chat_msg
)
assert
len
(
messages
)
==
1
assert
messages
[
0
].
content
[
0
].
text
==
""
def
test_tool_message_with_string_content
(
self
):
"""Test parsing tool message with string content."""
chat_msg
=
{
"role"
:
"tool"
,
"name"
:
"get_weather"
,
"content"
:
"The weather in San Francisco is sunny, 72°F"
,
}
messages
=
response_previous_input_to_harmony
(
chat_msg
)
assert
len
(
messages
)
==
1
assert
messages
[
0
].
author
.
role
==
Role
.
TOOL
assert
messages
[
0
].
author
.
name
==
"functions.get_weather"
assert
(
messages
[
0
].
content
[
0
].
text
==
"The weather in San Francisco is sunny, 72°F"
)
assert
messages
[
0
].
channel
==
"commentary"
def
test_tool_message_with_array_content
(
self
):
"""Test parsing tool message with array content."""
chat_msg
=
{
"role"
:
"tool"
,
"name"
:
"search_results"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"Result 1: "
},
{
"type"
:
"text"
,
"text"
:
"Result 2: "
},
{
"type"
:
"image"
,
"url"
:
"http://example.com/img.png"
,
},
# Should be ignored
{
"type"
:
"text"
,
"text"
:
"Result 3"
},
],
}
messages
=
response_previous_input_to_harmony
(
chat_msg
)
assert
len
(
messages
)
==
1
assert
messages
[
0
].
author
.
role
==
Role
.
TOOL
assert
messages
[
0
].
author
.
name
==
"functions.search_results"
assert
messages
[
0
].
content
[
0
].
text
==
"Result 1: Result 2: Result 3"
def
test_tool_message_with_empty_content
(
self
):
"""Test parsing tool message with None content."""
chat_msg
=
{
"role"
:
"tool"
,
"name"
:
"empty_tool"
,
"content"
:
None
,
}
messages
=
response_previous_input_to_harmony
(
chat_msg
)
assert
len
(
messages
)
==
1
assert
messages
[
0
].
author
.
role
==
Role
.
TOOL
assert
messages
[
0
].
author
.
name
==
"functions.empty_tool"
assert
messages
[
0
].
content
[
0
].
text
==
""
class
TestHarmonyToResponseOutput
:
"""Tests for harmony_to_response_output function."""
def
test_commentary_with_no_recipient_creates_message
(
self
):
"""Test that commentary with recipient=None (preambles) creates message items.
Per Harmony format, preambles are intended to be shown to end-users,
unlike analysis channel content which is hidden reasoning.
See: https://cookbook.openai.com/articles/openai-harmony
"""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"I will now search for the weather information."
)
message
=
message
.
with_channel
(
"commentary"
)
# recipient is None by default, representing a preamble
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseOutputMessage
)
assert
output_items
[
0
].
type
==
"message"
assert
output_items
[
0
].
role
==
"assistant"
assert
output_items
[
0
].
status
==
"completed"
assert
len
(
output_items
[
0
].
content
)
==
1
assert
output_items
[
0
].
content
[
0
].
type
==
"output_text"
assert
(
output_items
[
0
].
content
[
0
].
text
==
"I will now search for the weather information."
)
def
test_commentary_with_function_recipient_creates_function_call
(
self
):
"""Test commentary with recipient='functions.X' creates function calls."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"location": "San Francisco", "units": "celsius"}'
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"functions.get_weather"
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseFunctionToolCall
)
assert
output_items
[
0
].
type
==
"function_call"
assert
output_items
[
0
].
name
==
"get_weather"
assert
(
output_items
[
0
].
arguments
==
'{"location": "San Francisco", "units": "celsius"}'
)
assert
output_items
[
0
].
call_id
.
startswith
(
"call_"
)
assert
output_items
[
0
].
id
.
startswith
(
"fc_"
)
def
test_commentary_with_python_recipient_creates_reasoning
(
self
):
"""Test that commentary with recipient='python' creates reasoning items."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"import numpy as np
\n
print(np.array([1, 2, 3]))"
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"python"
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseReasoningItem
)
assert
output_items
[
0
].
type
==
"reasoning"
assert
(
output_items
[
0
].
content
[
0
].
text
==
"import numpy as np
\n
print(np.array([1, 2, 3]))"
)
def
test_commentary_with_browser_recipient_creates_reasoning
(
self
):
"""Test that commentary with recipient='browser' creates reasoning items."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"Navigating to the specified URL"
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"browser"
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseReasoningItem
)
assert
output_items
[
0
].
type
==
"reasoning"
assert
output_items
[
0
].
content
[
0
].
text
==
"Navigating to the specified URL"
def
test_commentary_with_container_recipient_creates_reasoning
(
self
):
"""Test that commentary with recipient='container' creates reasoning items."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"Running command in container"
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"container"
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseReasoningItem
)
assert
output_items
[
0
].
type
==
"reasoning"
assert
output_items
[
0
].
content
[
0
].
text
==
"Running command in container"
def
test_commentary_with_empty_content_and_no_recipient
(
self
):
"""Test edge case: empty commentary with recipient=None."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
""
)
message
=
message
.
with_channel
(
"commentary"
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseOutputMessage
)
assert
output_items
[
0
].
content
[
0
].
text
==
""
def
test_commentary_with_multiple_contents_and_no_recipient
(
self
):
"""Test multiple content items in commentary with no recipient."""
contents
=
[
TextContent
(
text
=
"Step 1: Analyze the request"
),
TextContent
(
text
=
"Step 2: Prepare to call functions"
),
]
message
=
Message
.
from_role_and_contents
(
Role
.
ASSISTANT
,
contents
)
message
=
message
.
with_channel
(
"commentary"
)
output_items
=
harmony_to_response_output
(
message
)
# _parse_final_message returns single ResponseOutputMessage with
# multiple contents
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseOutputMessage
)
assert
len
(
output_items
[
0
].
content
)
==
2
assert
output_items
[
0
].
content
[
0
].
text
==
"Step 1: Analyze the request"
assert
output_items
[
0
].
content
[
1
].
text
==
"Step 2: Prepare to call functions"
def
test_commentary_with_multiple_function_calls
(
self
):
"""Test multiple function calls in commentary channel."""
contents
=
[
TextContent
(
text
=
'{"location": "San Francisco"}'
),
TextContent
(
text
=
'{"location": "New York"}'
),
]
message
=
Message
.
from_role_and_contents
(
Role
.
ASSISTANT
,
contents
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"functions.get_weather"
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
2
assert
all
(
isinstance
(
item
,
ResponseFunctionToolCall
)
for
item
in
output_items
)
assert
output_items
[
0
].
name
==
"get_weather"
assert
output_items
[
1
].
name
==
"get_weather"
assert
output_items
[
0
].
arguments
==
'{"location": "San Francisco"}'
assert
output_items
[
1
].
arguments
==
'{"location": "New York"}'
def
test_commentary_with_unknown_recipient_creates_mcp_call
(
self
):
"""Test that commentary with unknown recipient creates MCP call."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"arg": "value"}'
)
message
=
message
.
with_channel
(
"commentary"
)
message
=
message
.
with_recipient
(
"custom_tool"
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
McpCall
)
assert
output_items
[
0
].
type
==
"mcp_call"
assert
output_items
[
0
].
name
==
"custom_tool"
assert
output_items
[
0
].
server_label
==
"custom_tool"
def
test_analysis_channel_creates_reasoning
(
self
):
"""Test that analysis channel creates reasoning items."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"Analyzing the problem step by step..."
)
message
=
message
.
with_channel
(
"analysis"
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
ResponseReasoningItem
)
assert
output_items
[
0
].
type
==
"reasoning"
assert
(
output_items
[
0
].
content
[
0
].
text
==
"Analyzing the problem step by step..."
)
def
test_non_assistant_message_returns_empty
(
self
):
"""Test that non-assistant messages return empty list.
Per the implementation, tool messages to assistant (e.g., search results)
are not included in final output to align with OpenAI behavior.
"""
message
=
Message
.
from_author_and_content
(
Author
.
new
(
Role
.
TOOL
,
"functions.get_weather"
),
"The weather is sunny, 72°F"
,
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
0
def
test_parse_mcp_call_basic
()
->
None
:
"""Test that MCP calls are parsed with correct type and server_label."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"path": "/tmp"}'
)
message
=
message
.
with_recipient
(
"filesystem"
)
message
=
message
.
with_channel
(
"commentary"
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
McpCall
)
assert
output_items
[
0
].
type
==
"mcp_call"
assert
output_items
[
0
].
name
==
"filesystem"
assert
output_items
[
0
].
server_label
==
"filesystem"
assert
output_items
[
0
].
arguments
==
'{"path": "/tmp"}'
assert
output_items
[
0
].
status
==
"completed"
def
test_parse_mcp_call_dotted_recipient
()
->
None
:
"""Test that dotted recipients extract the tool name correctly."""
message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"cmd": "ls"}'
)
message
=
message
.
with_recipient
(
"repo_browser.list"
)
message
=
message
.
with_channel
(
"commentary"
)
output_items
=
harmony_to_response_output
(
message
)
assert
len
(
output_items
)
==
1
assert
isinstance
(
output_items
[
0
],
McpCall
)
assert
output_items
[
0
].
name
==
"list"
assert
output_items
[
0
].
server_label
==
"repo_browser"
def
test_mcp_vs_function_call
()
->
None
:
"""Test that function calls are not parsed as MCP calls."""
func_message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
'{"arg": "value"}'
)
func_message
=
func_message
.
with_recipient
(
"functions.my_tool"
)
func_message
=
func_message
.
with_channel
(
"commentary"
)
func_items
=
harmony_to_response_output
(
func_message
)
assert
len
(
func_items
)
==
1
assert
not
isinstance
(
func_items
[
0
],
McpCall
)
assert
func_items
[
0
].
type
==
"function_call"
def
test_mcp_vs_builtin_tools
()
->
None
:
"""Test that built-in tools (python, container) are not parsed as MCP calls."""
# Test python (built-in tool) - should be reasoning, not MCP
python_message
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
"print('hello')"
)
python_message
=
python_message
.
with_recipient
(
"python"
)
python_message
=
python_message
.
with_channel
(
"commentary"
)
python_items
=
harmony_to_response_output
(
python_message
)
assert
len
(
python_items
)
==
1
assert
not
isinstance
(
python_items
[
0
],
McpCall
)
assert
python_items
[
0
].
type
==
"reasoning"
def
test_parser_state_to_response_output_commentary_channel
()
->
None
:
"""Test parser_state_to_response_output with commentary
channel and various recipients."""
from
unittest.mock
import
Mock
# Test 1: functions.* recipient -> should return function tool call
parser_func
=
Mock
()
parser_func
.
current_content
=
'{"arg": "value"}'
parser_func
.
current_role
=
Role
.
ASSISTANT
parser_func
.
current_channel
=
"commentary"
parser_func
.
current_recipient
=
"functions.my_tool"
func_items
=
parser_state_to_response_output
(
parser_func
)
assert
len
(
func_items
)
==
1
assert
not
isinstance
(
func_items
[
0
],
McpCall
)
assert
func_items
[
0
].
type
==
"function_call"
assert
func_items
[
0
].
name
==
"my_tool"
assert
func_items
[
0
].
status
==
"in_progress"
# Test 2: MCP tool (not builtin) -> should return MCP call
parser_mcp
=
Mock
()
parser_mcp
.
current_content
=
'{"path": "/tmp"}'
parser_mcp
.
current_role
=
Role
.
ASSISTANT
parser_mcp
.
current_channel
=
"commentary"
parser_mcp
.
current_recipient
=
"filesystem"
mcp_items
=
parser_state_to_response_output
(
parser_mcp
)
assert
len
(
mcp_items
)
==
1
assert
isinstance
(
mcp_items
[
0
],
McpCall
)
assert
mcp_items
[
0
].
type
==
"mcp_call"
assert
mcp_items
[
0
].
name
==
"filesystem"
assert
mcp_items
[
0
].
server_label
==
"filesystem"
assert
mcp_items
[
0
].
status
==
"in_progress"
# Test 3: Built-in tool (python)
# should NOT return MCP call, returns reasoning (internal tool interaction)
parser_builtin
=
Mock
()
parser_builtin
.
current_content
=
"print('hello')"
parser_builtin
.
current_role
=
Role
.
ASSISTANT
parser_builtin
.
current_channel
=
"commentary"
parser_builtin
.
current_recipient
=
"python"
builtin_items
=
parser_state_to_response_output
(
parser_builtin
)
# Built-in tools explicitly return reasoning
assert
len
(
builtin_items
)
==
1
assert
not
isinstance
(
builtin_items
[
0
],
McpCall
)
assert
builtin_items
[
0
].
type
==
"reasoning"
# Test 4: No recipient (preamble) → should return message, not reasoning
parser_preamble
=
Mock
()
parser_preamble
.
current_content
=
"I'll search for that information now."
parser_preamble
.
current_role
=
Role
.
ASSISTANT
parser_preamble
.
current_channel
=
"commentary"
parser_preamble
.
current_recipient
=
None
preamble_items
=
parser_state_to_response_output
(
parser_preamble
)
assert
len
(
preamble_items
)
==
1
assert
isinstance
(
preamble_items
[
0
],
ResponseOutputMessage
)
assert
preamble_items
[
0
].
type
==
"message"
assert
preamble_items
[
0
].
content
[
0
].
text
==
"I'll search for that information now."
assert
preamble_items
[
0
].
status
==
"incomplete"
# streaming
def
test_parser_state_to_response_output_analysis_channel
()
->
None
:
"""Test parser_state_to_response_output with analysis
channel and various recipients."""
from
unittest.mock
import
Mock
# Test 1: functions.* recipient -> should return function tool call
parser_func
=
Mock
()
parser_func
.
current_content
=
'{"arg": "value"}'
parser_func
.
current_role
=
Role
.
ASSISTANT
parser_func
.
current_channel
=
"analysis"
parser_func
.
current_recipient
=
"functions.my_tool"
func_items
=
parser_state_to_response_output
(
parser_func
)
assert
len
(
func_items
)
==
1
assert
not
isinstance
(
func_items
[
0
],
McpCall
)
assert
func_items
[
0
].
type
==
"function_call"
assert
func_items
[
0
].
name
==
"my_tool"
assert
func_items
[
0
].
status
==
"in_progress"
# Test 2: MCP tool (not builtin) -> should return MCP call
parser_mcp
=
Mock
()
parser_mcp
.
current_content
=
'{"query": "test"}'
parser_mcp
.
current_role
=
Role
.
ASSISTANT
parser_mcp
.
current_channel
=
"analysis"
parser_mcp
.
current_recipient
=
"database"
mcp_items
=
parser_state_to_response_output
(
parser_mcp
)
assert
len
(
mcp_items
)
==
1
assert
isinstance
(
mcp_items
[
0
],
McpCall
)
assert
mcp_items
[
0
].
type
==
"mcp_call"
assert
mcp_items
[
0
].
name
==
"database"
assert
mcp_items
[
0
].
server_label
==
"database"
assert
mcp_items
[
0
].
status
==
"in_progress"
# Test 3: Built-in tool (container)
# should NOT return MCP call, falls through to reasoning
parser_builtin
=
Mock
()
parser_builtin
.
current_content
=
"docker run"
parser_builtin
.
current_role
=
Role
.
ASSISTANT
parser_builtin
.
current_channel
=
"analysis"
parser_builtin
.
current_recipient
=
"container"
builtin_items
=
parser_state_to_response_output
(
parser_builtin
)
# Should fall through to reasoning logic
assert
len
(
builtin_items
)
==
1
assert
not
isinstance
(
builtin_items
[
0
],
McpCall
)
assert
builtin_items
[
0
].
type
==
"reasoning"
tests/entrypoints/openai/responses/test_mcp_tools.py
View file @
186ea22e
...
...
@@ -97,16 +97,16 @@ class TestMCPToolServerUnit:
assert
server
.
get_tool_description
(
"test_server"
,
allowed_tools
=
[])
is
None
def
test_builtin_tools_consistency
(
self
):
"""MCP_BUILTIN_TOOLS must match
_
BUILTIN_TOOL_TO_MCP_SERVER_LABEL values."""
"""MCP_BUILTIN_TOOLS must match BUILTIN_TOOL_TO_MCP_SERVER_LABEL values."""
from
vllm.entrypoints.openai.parser.harmony_utils
import
(
_
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
,
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
,
MCP_BUILTIN_TOOLS
,
)
assert
set
(
_
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
.
values
())
==
MCP_BUILTIN_TOOLS
,
(
assert
set
(
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
.
values
())
==
MCP_BUILTIN_TOOLS
,
(
f
"MCP_BUILTIN_TOOLS
{
MCP_BUILTIN_TOOLS
}
does not match "
f
"
_
BUILTIN_TOOL_TO_MCP_SERVER_LABEL values "
f
"
{
set
(
_
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
.
values
())
}
"
f
"BUILTIN_TOOL_TO_MCP_SERVER_LABEL values "
f
"
{
set
(
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
.
values
())
}
"
)
...
...
vllm/entrypoints/openai/parser/harmony_utils.py
View file @
186ea22e
...
...
@@ -2,27 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
datetime
import
json
from
collections.abc
import
Iterable
,
Sequence
from
typing
import
Literal
from
openai.types.responses
import
(
ResponseFunctionToolCall
,
ResponseOutputItem
,
ResponseOutputMessage
,
ResponseOutputText
,
ResponseReasoningItem
,
)
from
openai.types.responses.response_function_web_search
import
(
ActionFind
,
ActionOpenPage
,
ActionSearch
,
ResponseFunctionWebSearch
,
)
from
openai.types.responses.response_output_item
import
McpCall
from
openai.types.responses.response_reasoning_item
import
(
Content
as
ResponseReasoningTextContent
,
)
from
openai.types.responses.tool
import
Tool
from
openai_harmony
import
(
Author
,
...
...
@@ -38,17 +20,10 @@ from openai_harmony import (
ToolDescription
,
load_harmony_encoding
,
)
from
openai_harmony
import
Message
as
OpenAIHarmonyMessage
from
openai_harmony
import
Role
as
OpenAIHarmonyRole
from
vllm
import
envs
from
vllm.entrypoints.openai.chat_completion.protocol
import
ChatCompletionToolsParam
from
vllm.entrypoints.openai.responses.protocol
import
(
ResponseInputOutputItem
,
ResponsesRequest
,
)
from
vllm.logger
import
init_logger
from
vllm.utils
import
random_uuid
logger
=
init_logger
(
__name__
)
...
...
@@ -64,14 +39,14 @@ _harmony_encoding = None
# they are available and requested by the user.
# Tool args are provided by MCP tool descriptions. Output
# of the tools are stringified.
_
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
:
dict
[
str
,
str
]
=
{
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
:
dict
[
str
,
str
]
=
{
"python"
:
"code_interpreter"
,
"browser"
:
"web_search_preview"
,
"container"
:
"container"
,
}
# Derive MCP_BUILTIN_TOOLS from the canonical mapping
MCP_BUILTIN_TOOLS
:
set
[
str
]
=
set
(
_
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
.
values
())
MCP_BUILTIN_TOOLS
:
set
[
str
]
=
set
(
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
.
values
())
def
has_custom_tools
(
tool_types
:
set
[
str
])
->
bool
:
...
...
@@ -179,55 +154,6 @@ def get_user_message(content: str) -> Message:
return
Message
.
from_role_and_content
(
Role
.
USER
,
content
)
def
parse_response_input
(
response_msg
:
ResponseInputOutputItem
,
prev_responses
:
list
[
ResponseOutputItem
|
ResponseReasoningItem
],
)
->
Message
:
if
not
isinstance
(
response_msg
,
dict
):
response_msg
=
response_msg
.
model_dump
()
if
"type"
not
in
response_msg
or
response_msg
[
"type"
]
==
"message"
:
role
=
response_msg
[
"role"
]
content
=
response_msg
[
"content"
]
# Add prefix for developer messages.
# <|start|>developer<|message|># Instructions {instructions}<|end|>
text_prefix
=
"Instructions:
\n
"
if
role
==
"developer"
else
""
if
isinstance
(
content
,
str
):
msg
=
Message
.
from_role_and_content
(
role
,
text_prefix
+
content
)
else
:
contents
=
[
TextContent
(
text
=
text_prefix
+
c
[
"text"
])
for
c
in
content
]
msg
=
Message
.
from_role_and_contents
(
role
,
contents
)
if
role
==
"assistant"
:
msg
=
msg
.
with_channel
(
"final"
)
elif
response_msg
[
"type"
]
==
"function_call_output"
:
call_id
=
response_msg
[
"call_id"
]
call_response
:
ResponseFunctionToolCall
|
None
=
None
for
prev_response
in
reversed
(
prev_responses
):
if
(
isinstance
(
prev_response
,
ResponseFunctionToolCall
)
and
prev_response
.
call_id
==
call_id
):
call_response
=
prev_response
break
if
call_response
is
None
:
raise
ValueError
(
f
"No call message found for
{
call_id
}
"
)
msg
=
Message
.
from_author_and_content
(
Author
.
new
(
Role
.
TOOL
,
f
"functions.
{
call_response
.
name
}
"
),
response_msg
[
"output"
],
)
elif
response_msg
[
"type"
]
==
"reasoning"
:
content
=
response_msg
[
"content"
]
assert
len
(
content
)
==
1
msg
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
content
[
0
][
"text"
])
elif
response_msg
[
"type"
]
==
"function_call"
:
msg
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
response_msg
[
"arguments"
])
msg
=
msg
.
with_channel
(
"commentary"
)
msg
=
msg
.
with_recipient
(
f
"functions.
{
response_msg
[
'name'
]
}
"
)
msg
=
msg
.
with_content_type
(
"json"
)
else
:
raise
ValueError
(
f
"Unknown input type:
{
response_msg
[
'type'
]
}
"
)
return
msg
def
parse_chat_inputs_to_harmony_messages
(
chat_msgs
:
list
)
->
list
[
Message
]:
"""
Parse a list of messages from request.messages in the Chat Completion API to
...
...
@@ -390,139 +316,6 @@ def parse_chat_input_to_harmony_message(
return
msgs
def
parse_input_to_harmony_message
(
chat_msg
)
->
list
[
Message
]:
"""Parse a message from request.previous_input_messages
into Harmony messages.
Supports both OpenAI chat format ({"role": "..."}) and
Harmony format ({"author": {"role": "..."}}).
"""
if
not
isinstance
(
chat_msg
,
dict
):
chat_msg
=
chat_msg
.
model_dump
(
exclude_none
=
True
)
if
"author"
in
chat_msg
and
isinstance
(
chat_msg
.
get
(
"author"
),
dict
):
return
[
_parse_harmony_format_message
(
chat_msg
)]
return
_parse_chat_format_message
(
chat_msg
)
def
_parse_harmony_format_message
(
chat_msg
:
dict
)
->
Message
:
"""Reconstruct a Message from Harmony-format dict,
preserving channel, recipient, and content_type."""
author_dict
=
chat_msg
[
"author"
]
role
=
author_dict
.
get
(
"role"
)
name
=
author_dict
.
get
(
"name"
)
raw_content
=
chat_msg
.
get
(
"content"
,
""
)
if
isinstance
(
raw_content
,
list
):
# TODO: Support refusal and non-text content types.
contents
=
[
TextContent
(
text
=
c
.
get
(
"text"
,
""
))
for
c
in
raw_content
]
elif
isinstance
(
raw_content
,
str
):
contents
=
[
TextContent
(
text
=
raw_content
)]
else
:
contents
=
[
TextContent
(
text
=
""
)]
if
name
:
msg
=
Message
.
from_author_and_contents
(
Author
.
new
(
Role
(
role
),
name
),
contents
)
else
:
msg
=
Message
.
from_role_and_contents
(
Role
(
role
),
contents
)
channel
=
chat_msg
.
get
(
"channel"
)
if
channel
:
msg
=
msg
.
with_channel
(
channel
)
recipient
=
chat_msg
.
get
(
"recipient"
)
if
recipient
:
msg
=
msg
.
with_recipient
(
recipient
)
content_type
=
chat_msg
.
get
(
"content_type"
)
if
content_type
:
msg
=
msg
.
with_content_type
(
content_type
)
return
msg
def
_parse_chat_format_message
(
chat_msg
:
dict
)
->
list
[
Message
]:
"""Parse an OpenAI chat-format dict into Harmony messages."""
role
=
chat_msg
.
get
(
"role"
)
if
role
is
None
:
raise
ValueError
(
f
"Message has no 'role' key:
{
chat_msg
}
"
)
# Assistant message with tool calls
tool_calls
=
chat_msg
.
get
(
"tool_calls"
)
if
role
==
"assistant"
and
tool_calls
:
msgs
:
list
[
Message
]
=
[]
for
call
in
tool_calls
:
func
=
call
.
get
(
"function"
,
{})
name
=
func
.
get
(
"name"
,
""
)
arguments
=
func
.
get
(
"arguments"
,
""
)
or
""
msg
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
arguments
)
msg
=
msg
.
with_channel
(
"commentary"
)
msg
=
msg
.
with_recipient
(
f
"functions.
{
name
}
"
)
msg
=
msg
.
with_content_type
(
"json"
)
msgs
.
append
(
msg
)
return
msgs
# Tool role message (tool output)
if
role
==
"tool"
:
name
=
chat_msg
.
get
(
"name"
,
""
)
if
name
and
not
name
.
startswith
(
"functions."
):
name
=
f
"functions.
{
name
}
"
content
=
chat_msg
.
get
(
"content"
,
""
)
or
""
content
=
flatten_chat_text_content
(
content
)
# NOTE: .with_recipient("assistant") is required on tool messages
# to match parse_chat_input_to_harmony_message behavior and ensure
# proper routing in the Harmony protocol.
msg
=
(
Message
.
from_author_and_content
(
Author
.
new
(
Role
.
TOOL
,
name
),
content
)
.
with_channel
(
"commentary"
)
.
with_recipient
(
"assistant"
)
)
return
[
msg
]
# Default: user/assistant/system messages
content
=
chat_msg
.
get
(
"content"
,
""
)
if
isinstance
(
content
,
str
):
contents
=
[
TextContent
(
text
=
content
)]
else
:
# TODO: Support refusal.
contents
=
[
TextContent
(
text
=
c
.
get
(
"text"
,
""
))
for
c
in
content
]
msg
=
Message
.
from_role_and_contents
(
role
,
contents
)
return
[
msg
]
def
construct_harmony_previous_input_messages
(
request
:
ResponsesRequest
,
)
->
list
[
OpenAIHarmonyMessage
]:
messages
:
list
[
OpenAIHarmonyMessage
]
=
[]
if
request
.
previous_input_messages
:
for
message
in
request
.
previous_input_messages
:
# Handle both OpenAIHarmonyMessage objects and dictionary inputs
if
isinstance
(
message
,
OpenAIHarmonyMessage
):
message_role
=
message
.
author
.
role
# To match OpenAI, instructions, reasoning and tools are
# always taken from the most recent Responses API request
# not carried over from previous requests
if
(
message_role
==
OpenAIHarmonyRole
.
SYSTEM
or
message_role
==
OpenAIHarmonyRole
.
DEVELOPER
):
continue
messages
.
append
(
message
)
else
:
harmony_messages
=
parse_input_to_harmony_message
(
message
)
for
harmony_msg
in
harmony_messages
:
message_role
=
harmony_msg
.
author
.
role
# To match OpenAI, instructions, reasoning and tools are
# always taken from the most recent Responses API request
# not carried over from previous requests
if
(
message_role
==
OpenAIHarmonyRole
.
SYSTEM
or
message_role
==
OpenAIHarmonyRole
.
DEVELOPER
):
continue
messages
.
append
(
harmony_msg
)
return
messages
def
render_for_completion
(
messages
:
list
[
Message
])
->
list
[
int
]:
conversation
=
Conversation
.
from_messages
(
messages
)
token_ids
=
get_encoding
().
render_conversation_for_completion
(
...
...
@@ -531,313 +324,6 @@ def render_for_completion(messages: list[Message]) -> list[int]:
return
token_ids
def
_parse_browser_tool_call
(
message
:
Message
,
recipient
:
str
)
->
ResponseOutputItem
:
"""Parse browser tool calls (search, open, find) into web search items."""
if
len
(
message
.
content
)
!=
1
:
raise
ValueError
(
"Invalid number of contents in browser message"
)
content
=
message
.
content
[
0
]
# Parse JSON args (with retry detection)
try
:
browser_call
=
json
.
loads
(
content
.
text
)
except
json
.
JSONDecodeError
:
logger
.
warning
(
"Invalid JSON in browser tool call, using error placeholder: %s"
,
content
.
text
,
)
json_retry_output_message
=
(
f
"Invalid JSON args, caught and retried:
{
content
.
text
}
"
)
browser_call
=
{
"query"
:
json_retry_output_message
,
"url"
:
json_retry_output_message
,
"pattern"
:
json_retry_output_message
,
}
# Create appropriate action based on recipient
if
recipient
==
"browser.search"
:
action
=
ActionSearch
(
query
=
f
"cursor:
{
browser_call
.
get
(
'query'
,
''
)
}
"
,
type
=
"search"
)
elif
recipient
==
"browser.open"
:
action
=
ActionOpenPage
(
url
=
f
"cursor:
{
browser_call
.
get
(
'url'
,
''
)
}
"
,
type
=
"open_page"
)
elif
recipient
==
"browser.find"
:
action
=
ActionFind
(
pattern
=
browser_call
.
get
(
"pattern"
,
""
),
url
=
f
"cursor:
{
browser_call
.
get
(
'url'
,
''
)
}
"
,
type
=
"find"
,
)
else
:
raise
ValueError
(
f
"Unknown browser action:
{
recipient
}
"
)
return
ResponseFunctionWebSearch
(
id
=
f
"ws_
{
random_uuid
()
}
"
,
action
=
action
,
status
=
"completed"
,
type
=
"web_search_call"
,
)
def
_parse_function_call
(
message
:
Message
,
recipient
:
str
)
->
list
[
ResponseOutputItem
]:
"""Parse function calls into function tool call items."""
function_name
=
recipient
.
split
(
"."
)[
-
1
]
output_items
=
[]
for
content
in
message
.
content
:
random_id
=
random_uuid
()
response_item
=
ResponseFunctionToolCall
(
arguments
=
content
.
text
,
call_id
=
f
"call_
{
random_id
}
"
,
type
=
"function_call"
,
name
=
function_name
,
id
=
f
"fc_
{
random_id
}
"
,
)
output_items
.
append
(
response_item
)
return
output_items
def
_parse_reasoning
(
message
:
Message
)
->
list
[
ResponseOutputItem
]:
"""Parse reasoning/analysis content into reasoning items."""
output_items
=
[]
for
content
in
message
.
content
:
reasoning_item
=
ResponseReasoningItem
(
id
=
f
"rs_
{
random_uuid
()
}
"
,
summary
=
[],
type
=
"reasoning"
,
content
=
[
ResponseReasoningTextContent
(
text
=
content
.
text
,
type
=
"reasoning_text"
)
],
status
=
None
,
)
output_items
.
append
(
reasoning_item
)
return
output_items
def
_parse_final_message
(
message
:
Message
)
->
ResponseOutputItem
:
"""Parse final channel messages into output message items."""
contents
=
[]
for
content
in
message
.
content
:
output_text
=
ResponseOutputText
(
text
=
content
.
text
,
annotations
=
[],
# TODO
type
=
"output_text"
,
logprobs
=
None
,
# TODO
)
contents
.
append
(
output_text
)
return
ResponseOutputMessage
(
id
=
f
"msg_
{
random_uuid
()
}
"
,
content
=
contents
,
role
=
message
.
author
.
role
,
status
=
"completed"
,
type
=
"message"
,
)
def
_parse_mcp_recipient
(
recipient
:
str
)
->
tuple
[
str
,
str
]:
"""
Parse MCP recipient into (server_label, tool_name).
For dotted recipients like "repo_browser.list":
- server_label: "repo_browser" (namespace/server)
- tool_name: "list" (specific tool)
For simple recipients like "filesystem":
- server_label: "filesystem"
- tool_name: "filesystem"
"""
if
"."
in
recipient
:
server_label
=
recipient
.
split
(
"."
)[
0
]
tool_name
=
recipient
.
split
(
"."
)[
-
1
]
else
:
server_label
=
recipient
tool_name
=
recipient
return
server_label
,
tool_name
def
_parse_mcp_call
(
message
:
Message
,
recipient
:
str
)
->
list
[
ResponseOutputItem
]:
"""Parse MCP calls into MCP call items."""
# Handle built-in tools that need server_label mapping
if
recipient
in
_BUILTIN_TOOL_TO_MCP_SERVER_LABEL
:
server_label
=
_BUILTIN_TOOL_TO_MCP_SERVER_LABEL
[
recipient
]
tool_name
=
recipient
else
:
server_label
,
tool_name
=
_parse_mcp_recipient
(
recipient
)
output_items
=
[]
for
content
in
message
.
content
:
response_item
=
McpCall
(
arguments
=
content
.
text
,
type
=
"mcp_call"
,
name
=
tool_name
,
server_label
=
server_label
,
id
=
f
"mcp_
{
random_uuid
()
}
"
,
status
=
"completed"
,
)
output_items
.
append
(
response_item
)
return
output_items
def
_parse_message_no_recipient
(
message
:
Message
,
)
->
list
[
ResponseOutputItem
]:
"""Parse a Harmony message with no recipient based on its channel."""
if
message
.
channel
==
"analysis"
:
return
_parse_reasoning
(
message
)
if
message
.
channel
in
(
"commentary"
,
"final"
):
# Per Harmony format, preambles (commentary with no recipient) and
# final channel content are both intended to be shown to end-users.
# See: https://cookbook.openai.com/articles/openai-harmony
return
[
_parse_final_message
(
message
)]
raise
ValueError
(
f
"Unknown channel:
{
message
.
channel
}
"
)
def
parse_output_message
(
message
:
Message
)
->
list
[
ResponseOutputItem
]:
"""
Parse a Harmony message into a list of output response items.
"""
if
message
.
author
.
role
!=
"assistant"
:
# This is a message from a tool to the assistant (e.g., search result).
# Don't include it in the final output for now. This aligns with
# OpenAI's behavior on models like o4-mini.
return
[]
output_items
:
list
[
ResponseOutputItem
]
=
[]
recipient
=
message
.
recipient
if
recipient
is
not
None
:
# Browser tool calls (browser.search, browser.open, browser.find)
if
recipient
.
startswith
(
"browser."
):
output_items
.
append
(
_parse_browser_tool_call
(
message
,
recipient
))
# Function calls (should only happen on commentary channel)
elif
message
.
channel
==
"commentary"
and
recipient
.
startswith
(
"functions."
):
output_items
.
extend
(
_parse_function_call
(
message
,
recipient
))
# Built-in MCP tools (python, browser, container)
elif
recipient
in
_BUILTIN_TOOL_TO_MCP_SERVER_LABEL
:
output_items
.
extend
(
_parse_reasoning
(
message
))
# All other recipients are MCP calls
else
:
output_items
.
extend
(
_parse_mcp_call
(
message
,
recipient
))
# No recipient - handle based on channel for non-tool messages
else
:
output_items
.
extend
(
_parse_message_no_recipient
(
message
))
return
output_items
def
parse_remaining_state
(
parser
:
StreamableParser
)
->
list
[
ResponseOutputItem
]:
if
not
parser
.
current_content
:
return
[]
if
parser
.
current_role
!=
Role
.
ASSISTANT
:
return
[]
current_recipient
=
parser
.
current_recipient
if
current_recipient
is
not
None
and
current_recipient
.
startswith
(
"browser."
):
return
[]
if
current_recipient
and
parser
.
current_channel
in
(
"commentary"
,
"analysis"
):
if
current_recipient
.
startswith
(
"functions."
):
rid
=
random_uuid
()
return
[
ResponseFunctionToolCall
(
arguments
=
parser
.
current_content
,
call_id
=
f
"call_
{
rid
}
"
,
type
=
"function_call"
,
name
=
current_recipient
.
split
(
"."
)[
-
1
],
id
=
f
"fc_
{
rid
}
"
,
status
=
"in_progress"
,
)
]
# Built-in MCP tools (python, browser, container)
elif
current_recipient
in
_BUILTIN_TOOL_TO_MCP_SERVER_LABEL
:
return
[
ResponseReasoningItem
(
id
=
f
"rs_
{
random_uuid
()
}
"
,
summary
=
[],
type
=
"reasoning"
,
content
=
[
ResponseReasoningTextContent
(
text
=
parser
.
current_content
,
type
=
"reasoning_text"
)
],
status
=
None
,
)
]
# All other recipients are MCP calls
else
:
rid
=
random_uuid
()
server_label
,
tool_name
=
_parse_mcp_recipient
(
current_recipient
)
return
[
McpCall
(
arguments
=
parser
.
current_content
,
type
=
"mcp_call"
,
name
=
tool_name
,
server_label
=
server_label
,
id
=
f
"mcp_
{
rid
}
"
,
status
=
"in_progress"
,
)
]
if
parser
.
current_channel
==
"commentary"
:
# Per Harmony format, preambles (commentary with no recipient) are
# intended to be shown to end-users, unlike analysis channel content.
output_text
=
ResponseOutputText
(
text
=
parser
.
current_content
,
annotations
=
[],
type
=
"output_text"
,
logprobs
=
None
,
)
return
[
ResponseOutputMessage
(
id
=
f
"msg_
{
random_uuid
()
}
"
,
content
=
[
output_text
],
role
=
"assistant"
,
status
=
"incomplete"
,
type
=
"message"
,
)
]
if
parser
.
current_channel
==
"analysis"
:
return
[
ResponseReasoningItem
(
id
=
f
"rs_
{
random_uuid
()
}
"
,
summary
=
[],
type
=
"reasoning"
,
content
=
[
ResponseReasoningTextContent
(
text
=
parser
.
current_content
,
type
=
"reasoning_text"
)
],
status
=
None
,
)
]
if
parser
.
current_channel
==
"final"
:
output_text
=
ResponseOutputText
(
text
=
parser
.
current_content
,
annotations
=
[],
# TODO
type
=
"output_text"
,
logprobs
=
None
,
# TODO
)
text_item
=
ResponseOutputMessage
(
id
=
f
"msg_
{
random_uuid
()
}
"
,
content
=
[
output_text
],
role
=
"assistant"
,
# if the parser still has messages (ie if the generator got cut
# abruptly), this should be incomplete
status
=
"incomplete"
,
type
=
"message"
,
)
return
[
text_item
]
return
[]
def
get_stop_tokens_for_assistant_actions
()
->
list
[
int
]:
return
get_encoding
().
stop_tokens_for_assistant_actions
()
...
...
vllm/entrypoints/openai/responses/harmony.py
0 → 100644
View file @
186ea22e
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Harmony ↔ Responses API conversion utilities.
Handles two directions:
1. Response Input → Harmony Messages (input parsing)
2. Harmony Messages → Response Output Items (output parsing)
"""
import
json
from
openai.types.responses
import
(
ResponseFunctionToolCall
,
ResponseOutputItem
,
ResponseOutputMessage
,
ResponseOutputText
,
ResponseReasoningItem
,
)
from
openai.types.responses.response_function_web_search
import
(
ActionFind
,
ActionOpenPage
,
ActionSearch
,
ResponseFunctionWebSearch
,
)
from
openai.types.responses.response_output_item
import
McpCall
from
openai.types.responses.response_reasoning_item
import
(
Content
as
ResponseReasoningTextContent
,
)
from
openai_harmony
import
Author
,
Message
,
Role
,
StreamableParser
,
TextContent
from
vllm.entrypoints.openai.parser.harmony_utils
import
(
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
,
flatten_chat_text_content
,
)
from
vllm.entrypoints.openai.responses.protocol
import
(
ResponseInputOutputItem
,
ResponsesRequest
,
)
from
vllm.logger
import
init_logger
from
vllm.utils
import
random_uuid
logger
=
init_logger
(
__name__
)
# ---------------------------------------------------------------------------
# 1. Private helpers for input parsing
# ---------------------------------------------------------------------------
def
_parse_harmony_format_message
(
chat_msg
:
dict
)
->
Message
:
"""Reconstruct a Message from Harmony-format dict,
preserving channel, recipient, and content_type."""
author_dict
=
chat_msg
[
"author"
]
role
=
author_dict
.
get
(
"role"
)
name
=
author_dict
.
get
(
"name"
)
raw_content
=
chat_msg
.
get
(
"content"
,
""
)
if
isinstance
(
raw_content
,
list
):
# TODO: Support refusal and non-text content types.
contents
=
[
TextContent
(
text
=
c
.
get
(
"text"
,
""
))
for
c
in
raw_content
]
elif
isinstance
(
raw_content
,
str
):
contents
=
[
TextContent
(
text
=
raw_content
)]
else
:
contents
=
[
TextContent
(
text
=
""
)]
if
name
:
msg
=
Message
.
from_author_and_contents
(
Author
.
new
(
Role
(
role
),
name
),
contents
)
else
:
msg
=
Message
.
from_role_and_contents
(
Role
(
role
),
contents
)
channel
=
chat_msg
.
get
(
"channel"
)
if
channel
:
msg
=
msg
.
with_channel
(
channel
)
recipient
=
chat_msg
.
get
(
"recipient"
)
if
recipient
:
msg
=
msg
.
with_recipient
(
recipient
)
content_type
=
chat_msg
.
get
(
"content_type"
)
if
content_type
:
msg
=
msg
.
with_content_type
(
content_type
)
return
msg
def
_parse_chat_format_message
(
chat_msg
:
dict
)
->
list
[
Message
]:
"""Parse an OpenAI chat-format dict into Harmony messages."""
role
=
chat_msg
.
get
(
"role"
)
if
role
is
None
:
raise
ValueError
(
f
"Message has no 'role' key:
{
chat_msg
}
"
)
# Assistant message with tool calls
tool_calls
=
chat_msg
.
get
(
"tool_calls"
)
if
role
==
"assistant"
and
tool_calls
:
msgs
:
list
[
Message
]
=
[]
for
call
in
tool_calls
:
func
=
call
.
get
(
"function"
,
{})
name
=
func
.
get
(
"name"
,
""
)
arguments
=
func
.
get
(
"arguments"
,
""
)
or
""
msg
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
arguments
)
msg
=
msg
.
with_channel
(
"commentary"
)
msg
=
msg
.
with_recipient
(
f
"functions.
{
name
}
"
)
msg
=
msg
.
with_content_type
(
"json"
)
msgs
.
append
(
msg
)
return
msgs
# Tool role message (tool output)
if
role
==
"tool"
:
name
=
chat_msg
.
get
(
"name"
,
""
)
if
name
and
not
name
.
startswith
(
"functions."
):
name
=
f
"functions.
{
name
}
"
content
=
chat_msg
.
get
(
"content"
,
""
)
or
""
content
=
flatten_chat_text_content
(
content
)
# NOTE: .with_recipient("assistant") is required on tool messages
# to match parse_chat_input_to_harmony_message behavior and ensure
# proper routing in the Harmony protocol.
msg
=
(
Message
.
from_author_and_content
(
Author
.
new
(
Role
.
TOOL
,
name
),
content
)
.
with_channel
(
"commentary"
)
.
with_recipient
(
"assistant"
)
)
return
[
msg
]
# Default: user/assistant/system messages
content
=
chat_msg
.
get
(
"content"
,
""
)
if
isinstance
(
content
,
str
):
contents
=
[
TextContent
(
text
=
content
)]
else
:
# TODO: Support refusal.
contents
=
[
TextContent
(
text
=
c
.
get
(
"text"
,
""
))
for
c
in
content
]
msg
=
Message
.
from_role_and_contents
(
role
,
contents
)
return
[
msg
]
# ---------------------------------------------------------------------------
# 2. Public input parsing functions
# ---------------------------------------------------------------------------
def
response_input_to_harmony
(
response_msg
:
ResponseInputOutputItem
,
prev_responses
:
list
[
ResponseOutputItem
|
ResponseReasoningItem
],
)
->
Message
:
"""Convert a single ResponseInputOutputItem into a Harmony Message."""
if
not
isinstance
(
response_msg
,
dict
):
response_msg
=
response_msg
.
model_dump
()
if
"type"
not
in
response_msg
or
response_msg
[
"type"
]
==
"message"
:
role
=
response_msg
[
"role"
]
content
=
response_msg
[
"content"
]
# Add prefix for developer messages.
# <|start|>developer<|message|># Instructions {instructions}<|end|>
text_prefix
=
"Instructions:
\n
"
if
role
==
"developer"
else
""
if
isinstance
(
content
,
str
):
msg
=
Message
.
from_role_and_content
(
role
,
text_prefix
+
content
)
else
:
contents
=
[
TextContent
(
text
=
text_prefix
+
c
[
"text"
])
for
c
in
content
]
msg
=
Message
.
from_role_and_contents
(
role
,
contents
)
if
role
==
"assistant"
:
msg
=
msg
.
with_channel
(
"final"
)
elif
response_msg
[
"type"
]
==
"function_call_output"
:
call_id
=
response_msg
[
"call_id"
]
call_response
:
ResponseFunctionToolCall
|
None
=
None
for
prev_response
in
reversed
(
prev_responses
):
if
(
isinstance
(
prev_response
,
ResponseFunctionToolCall
)
and
prev_response
.
call_id
==
call_id
):
call_response
=
prev_response
break
if
call_response
is
None
:
raise
ValueError
(
f
"No call message found for
{
call_id
}
"
)
msg
=
Message
.
from_author_and_content
(
Author
.
new
(
Role
.
TOOL
,
f
"functions.
{
call_response
.
name
}
"
),
response_msg
[
"output"
],
)
elif
response_msg
[
"type"
]
==
"reasoning"
:
content
=
response_msg
[
"content"
]
assert
len
(
content
)
==
1
msg
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
content
[
0
][
"text"
])
elif
response_msg
[
"type"
]
==
"function_call"
:
msg
=
Message
.
from_role_and_content
(
Role
.
ASSISTANT
,
response_msg
[
"arguments"
])
msg
=
msg
.
with_channel
(
"commentary"
)
msg
=
msg
.
with_recipient
(
f
"functions.
{
response_msg
[
'name'
]
}
"
)
msg
=
msg
.
with_content_type
(
"json"
)
else
:
raise
ValueError
(
f
"Unknown input type:
{
response_msg
[
'type'
]
}
"
)
return
msg
def
response_previous_input_to_harmony
(
chat_msg
)
->
list
[
Message
]:
"""Parse a message from request.previous_input_messages
into Harmony messages.
Supports both OpenAI chat format ({"role": "..."}) and
Harmony format ({"author": {"role": "..."}}).
"""
if
not
isinstance
(
chat_msg
,
dict
):
chat_msg
=
chat_msg
.
model_dump
(
exclude_none
=
True
)
if
"author"
in
chat_msg
and
isinstance
(
chat_msg
.
get
(
"author"
),
dict
):
return
[
_parse_harmony_format_message
(
chat_msg
)]
return
_parse_chat_format_message
(
chat_msg
)
def
construct_harmony_previous_input_messages
(
request
:
ResponsesRequest
,
)
->
list
[
Message
]:
"""Build a Harmony message list from request.previous_input_messages.
Filters out system/developer messages to match OpenAI behavior where
instructions are always taken from the most recent Responses API request.
"""
messages
:
list
[
Message
]
=
[]
if
request
.
previous_input_messages
:
for
message
in
request
.
previous_input_messages
:
# Handle both Message objects and dictionary inputs
if
isinstance
(
message
,
Message
):
message_role
=
message
.
author
.
role
if
message_role
==
Role
.
SYSTEM
or
message_role
==
Role
.
DEVELOPER
:
continue
messages
.
append
(
message
)
else
:
harmony_messages
=
response_previous_input_to_harmony
(
message
)
for
harmony_msg
in
harmony_messages
:
message_role
=
harmony_msg
.
author
.
role
if
message_role
==
Role
.
SYSTEM
or
message_role
==
Role
.
DEVELOPER
:
continue
messages
.
append
(
harmony_msg
)
return
messages
# ---------------------------------------------------------------------------
# 3. Private helpers for output parsing
# ---------------------------------------------------------------------------
def
_parse_browser_tool_call
(
message
:
Message
,
recipient
:
str
)
->
ResponseOutputItem
:
"""Parse browser tool calls (search, open, find) into web search items."""
if
len
(
message
.
content
)
!=
1
:
raise
ValueError
(
"Invalid number of contents in browser message"
)
content
=
message
.
content
[
0
]
# Parse JSON args (with retry detection)
try
:
browser_call
=
json
.
loads
(
content
.
text
)
except
json
.
JSONDecodeError
:
logger
.
warning
(
"Invalid JSON in browser tool call, using error placeholder: %s"
,
content
.
text
,
)
json_retry_output_message
=
(
f
"Invalid JSON args, caught and retried:
{
content
.
text
}
"
)
browser_call
=
{
"query"
:
json_retry_output_message
,
"url"
:
json_retry_output_message
,
"pattern"
:
json_retry_output_message
,
}
# Create appropriate action based on recipient
if
recipient
==
"browser.search"
:
action
=
ActionSearch
(
query
=
f
"cursor:
{
browser_call
.
get
(
'query'
,
''
)
}
"
,
type
=
"search"
)
elif
recipient
==
"browser.open"
:
action
=
ActionOpenPage
(
url
=
f
"cursor:
{
browser_call
.
get
(
'url'
,
''
)
}
"
,
type
=
"open_page"
)
elif
recipient
==
"browser.find"
:
action
=
ActionFind
(
pattern
=
browser_call
.
get
(
"pattern"
,
""
),
url
=
f
"cursor:
{
browser_call
.
get
(
'url'
,
''
)
}
"
,
type
=
"find"
,
)
else
:
raise
ValueError
(
f
"Unknown browser action:
{
recipient
}
"
)
return
ResponseFunctionWebSearch
(
id
=
f
"ws_
{
random_uuid
()
}
"
,
action
=
action
,
status
=
"completed"
,
type
=
"web_search_call"
,
)
def
_parse_function_call
(
message
:
Message
,
recipient
:
str
)
->
list
[
ResponseOutputItem
]:
"""Parse function calls into function tool call items."""
function_name
=
recipient
.
split
(
"."
)[
-
1
]
output_items
=
[]
for
content
in
message
.
content
:
random_id
=
random_uuid
()
response_item
=
ResponseFunctionToolCall
(
arguments
=
content
.
text
,
call_id
=
f
"call_
{
random_id
}
"
,
type
=
"function_call"
,
name
=
function_name
,
id
=
f
"fc_
{
random_id
}
"
,
)
output_items
.
append
(
response_item
)
return
output_items
def
_parse_reasoning
(
message
:
Message
)
->
list
[
ResponseOutputItem
]:
"""Parse reasoning/analysis content into reasoning items."""
output_items
=
[]
for
content
in
message
.
content
:
reasoning_item
=
ResponseReasoningItem
(
id
=
f
"rs_
{
random_uuid
()
}
"
,
summary
=
[],
type
=
"reasoning"
,
content
=
[
ResponseReasoningTextContent
(
text
=
content
.
text
,
type
=
"reasoning_text"
)
],
status
=
None
,
)
output_items
.
append
(
reasoning_item
)
return
output_items
def
_parse_final_message
(
message
:
Message
)
->
ResponseOutputItem
:
"""Parse final channel messages into output message items."""
contents
=
[]
for
content
in
message
.
content
:
output_text
=
ResponseOutputText
(
text
=
content
.
text
,
annotations
=
[],
# TODO
type
=
"output_text"
,
logprobs
=
None
,
# TODO
)
contents
.
append
(
output_text
)
return
ResponseOutputMessage
(
id
=
f
"msg_
{
random_uuid
()
}
"
,
content
=
contents
,
role
=
message
.
author
.
role
,
status
=
"completed"
,
type
=
"message"
,
)
def
_parse_mcp_recipient
(
recipient
:
str
)
->
tuple
[
str
,
str
]:
"""Parse MCP recipient into (server_label, tool_name).
For dotted recipients like "repo_browser.list":
- server_label: "repo_browser" (namespace/server)
- tool_name: "list" (specific tool)
For simple recipients like "filesystem":
- server_label: "filesystem"
- tool_name: "filesystem"
"""
if
"."
in
recipient
:
server_label
=
recipient
.
split
(
"."
)[
0
]
tool_name
=
recipient
.
split
(
"."
)[
-
1
]
else
:
server_label
=
recipient
tool_name
=
recipient
return
server_label
,
tool_name
def
_parse_mcp_call
(
message
:
Message
,
recipient
:
str
)
->
list
[
ResponseOutputItem
]:
"""Parse MCP calls into MCP call items."""
# Handle built-in tools that need server_label mapping
if
recipient
in
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
:
server_label
=
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
[
recipient
]
tool_name
=
recipient
else
:
server_label
,
tool_name
=
_parse_mcp_recipient
(
recipient
)
output_items
=
[]
for
content
in
message
.
content
:
response_item
=
McpCall
(
arguments
=
content
.
text
,
type
=
"mcp_call"
,
name
=
tool_name
,
server_label
=
server_label
,
id
=
f
"mcp_
{
random_uuid
()
}
"
,
status
=
"completed"
,
)
output_items
.
append
(
response_item
)
return
output_items
def
_parse_message_no_recipient
(
message
:
Message
,
)
->
list
[
ResponseOutputItem
]:
"""Parse a Harmony message with no recipient based on its channel."""
if
message
.
channel
==
"analysis"
:
return
_parse_reasoning
(
message
)
if
message
.
channel
in
(
"commentary"
,
"final"
):
# Per Harmony format, preambles (commentary with no recipient) and
# final channel content are both intended to be shown to end-users.
# See: https://cookbook.openai.com/articles/openai-harmony
return
[
_parse_final_message
(
message
)]
raise
ValueError
(
f
"Unknown channel:
{
message
.
channel
}
"
)
# ---------------------------------------------------------------------------
# 4. Public output parsing functions
# ---------------------------------------------------------------------------
def
harmony_to_response_output
(
message
:
Message
)
->
list
[
ResponseOutputItem
]:
"""Parse a Harmony message into a list of output response items.
This is the main dispatcher that routes based on channel and recipient.
"""
if
message
.
author
.
role
!=
"assistant"
:
# This is a message from a tool to the assistant (e.g., search result).
# Don't include it in the final output for now. This aligns with
# OpenAI's behavior on models like o4-mini.
return
[]
output_items
:
list
[
ResponseOutputItem
]
=
[]
recipient
=
message
.
recipient
if
recipient
is
not
None
:
# Browser tool calls (browser.search, browser.open, browser.find)
if
recipient
.
startswith
(
"browser."
):
output_items
.
append
(
_parse_browser_tool_call
(
message
,
recipient
))
# Function calls (should only happen on commentary channel)
elif
message
.
channel
==
"commentary"
and
recipient
.
startswith
(
"functions."
):
output_items
.
extend
(
_parse_function_call
(
message
,
recipient
))
# Built-in MCP tools (python, browser, container)
elif
recipient
in
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
:
output_items
.
extend
(
_parse_reasoning
(
message
))
# All other recipients are MCP calls
else
:
output_items
.
extend
(
_parse_mcp_call
(
message
,
recipient
))
# No recipient - handle based on channel for non-tool messages
else
:
output_items
.
extend
(
_parse_message_no_recipient
(
message
))
return
output_items
def
parser_state_to_response_output
(
parser
:
StreamableParser
,
)
->
list
[
ResponseOutputItem
]:
"""Extract in-progress response items from incomplete parser state.
Called when the parser has buffered content that hasn't formed a
complete message yet (e.g., generation was cut short).
"""
if
not
parser
.
current_content
:
return
[]
if
parser
.
current_role
!=
Role
.
ASSISTANT
:
return
[]
current_recipient
=
parser
.
current_recipient
if
current_recipient
is
not
None
and
current_recipient
.
startswith
(
"browser."
):
return
[]
if
current_recipient
and
parser
.
current_channel
in
(
"commentary"
,
"analysis"
):
if
current_recipient
.
startswith
(
"functions."
):
rid
=
random_uuid
()
return
[
ResponseFunctionToolCall
(
arguments
=
parser
.
current_content
,
call_id
=
f
"call_
{
rid
}
"
,
type
=
"function_call"
,
name
=
current_recipient
.
split
(
"."
)[
-
1
],
id
=
f
"fc_
{
rid
}
"
,
status
=
"in_progress"
,
)
]
# Built-in MCP tools (python, browser, container)
elif
current_recipient
in
BUILTIN_TOOL_TO_MCP_SERVER_LABEL
:
return
[
ResponseReasoningItem
(
id
=
f
"rs_
{
random_uuid
()
}
"
,
summary
=
[],
type
=
"reasoning"
,
content
=
[
ResponseReasoningTextContent
(
text
=
parser
.
current_content
,
type
=
"reasoning_text"
)
],
status
=
None
,
)
]
# All other recipients are MCP calls
else
:
rid
=
random_uuid
()
server_label
,
tool_name
=
_parse_mcp_recipient
(
current_recipient
)
return
[
McpCall
(
arguments
=
parser
.
current_content
,
type
=
"mcp_call"
,
name
=
tool_name
,
server_label
=
server_label
,
id
=
f
"mcp_
{
rid
}
"
,
status
=
"in_progress"
,
)
]
if
parser
.
current_channel
==
"commentary"
:
# Per Harmony format, preambles (commentary with no recipient) are
# intended to be shown to end-users, unlike analysis channel content.
output_text
=
ResponseOutputText
(
text
=
parser
.
current_content
,
annotations
=
[],
type
=
"output_text"
,
logprobs
=
None
,
)
return
[
ResponseOutputMessage
(
id
=
f
"msg_
{
random_uuid
()
}
"
,
content
=
[
output_text
],
role
=
"assistant"
,
status
=
"incomplete"
,
type
=
"message"
,
)
]
if
parser
.
current_channel
==
"analysis"
:
return
[
ResponseReasoningItem
(
id
=
f
"rs_
{
random_uuid
()
}
"
,
summary
=
[],
type
=
"reasoning"
,
content
=
[
ResponseReasoningTextContent
(
text
=
parser
.
current_content
,
type
=
"reasoning_text"
)
],
status
=
None
,
)
]
if
parser
.
current_channel
==
"final"
:
output_text
=
ResponseOutputText
(
text
=
parser
.
current_content
,
annotations
=
[],
# TODO
type
=
"output_text"
,
logprobs
=
None
,
# TODO
)
text_item
=
ResponseOutputMessage
(
id
=
f
"msg_
{
random_uuid
()
}
"
,
content
=
[
output_text
],
role
=
"assistant"
,
# if the parser still has messages (ie if the generator got cut
# abruptly), this should be incomplete
status
=
"incomplete"
,
type
=
"message"
,
)
return
[
text_item
]
return
[]
vllm/entrypoints/openai/responses/serving.py
View file @
186ea22e
...
...
@@ -58,15 +58,11 @@ from vllm.entrypoints.openai.engine.serving import (
)
from
vllm.entrypoints.openai.models.serving
import
OpenAIServingModels
from
vllm.entrypoints.openai.parser.harmony_utils
import
(
construct_harmony_previous_input_messages
,
get_developer_message
,
get_stop_tokens_for_assistant_actions
,
get_system_message
,
get_user_message
,
has_custom_tools
,
parse_output_message
,
parse_remaining_state
,
parse_response_input
,
render_for_completion
,
)
from
vllm.entrypoints.openai.responses.context
import
(
...
...
@@ -76,6 +72,12 @@ from vllm.entrypoints.openai.responses.context import (
SimpleContext
,
StreamingHarmonyContext
,
)
from
vllm.entrypoints.openai.responses.harmony
import
(
construct_harmony_previous_input_messages
,
harmony_to_response_output
,
parser_state_to_response_output
,
response_input_to_harmony
,
)
from
vllm.entrypoints.openai.responses.protocol
import
(
InputTokensDetails
,
OutputTokensDetails
,
...
...
@@ -954,9 +956,9 @@ class OpenAIServingResponses(OpenAIServing):
output_items
:
list
[
ResponseOutputItem
]
=
[]
num_init_messages
=
context
.
num_init_messages
for
msg
in
context
.
messages
[
num_init_messages
:]:
output_items
.
extend
(
p
arse_output
_message
(
msg
))
output_items
.
extend
(
h
ar
mony_to_respon
se_output
(
msg
))
# Handle the generation stopped in the middle (if any).
last_items
=
parse
_remaining_state
(
context
.
parser
)
last_items
=
parse
r_state_to_response_output
(
context
.
parser
)
if
last_items
:
output_items
.
extend
(
last_items
)
return
output_items
...
...
@@ -1103,13 +1105,13 @@ class OpenAIServingResponses(OpenAIServing):
else
:
prev_outputs
=
[]
for
response_msg
in
request
.
input
:
new_msg
=
parse_
response_input
(
response_msg
,
prev_outputs
)
new_msg
=
response_input
_to_harmony
(
response_msg
,
prev_outputs
)
if
new_msg
.
author
.
role
!=
"system"
:
messages
.
append
(
new_msg
)
# User passes in a tool call request and its output. We need
# to add the tool call request to prev_outputs so that
the
#
parse_
response_input can find the tool call request when
# to add the tool call request to prev_outputs so that
# response_input
_to_harmony
can find the tool call request when
# parsing the tool call output.
if
isinstance
(
response_msg
,
ResponseFunctionToolCall
):
prev_outputs
.
append
(
response_msg
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment