Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
25b3242d
Unverified
Commit
25b3242d
authored
Apr 14, 2026
by
noobHappylife
Committed by
GitHub
Apr 14, 2026
Browse files
Fix Responses API streaming for multiple auto tool calls (#39626)
Signed-off-by:
noobhappylife
<
aratar1991@hotmail.com
>
parent
b075604d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
329 additions
and
21 deletions
+329
-21
tests/entrypoints/openai/responses/test_function_call.py
tests/entrypoints/openai/responses/test_function_call.py
+50
-16
tests/entrypoints/openai/responses/test_serving_responses.py
tests/entrypoints/openai/responses/test_serving_responses.py
+196
-0
vllm/entrypoints/openai/responses/serving.py
vllm/entrypoints/openai/responses/serving.py
+83
-5
No files found.
tests/entrypoints/openai/responses/test_function_call.py
View file @
25b3242d
...
@@ -249,40 +249,74 @@ async def test_function_calling_with_streaming_expected_arguments(
...
@@ -249,40 +249,74 @@ async def test_function_calling_with_streaming_expected_arguments(
"additionalProperties"
:
False
,
"additionalProperties"
:
False
,
},
},
"strict"
:
True
,
"strict"
:
True
,
}
},
{
"type"
:
"function"
,
"name"
:
"get_time"
,
"description"
:
"Get current local time for provided location."
,
"parameters"
:
{
"type"
:
"object"
,
"properties"
:
{
"location"
:
{
"type"
:
"string"
},
},
"required"
:
[
"location"
],
"additionalProperties"
:
False
,
},
"strict"
:
True
,
},
]
]
stream_response
=
await
client
.
responses
.
create
(
stream_response
=
await
client
.
responses
.
create
(
model
=
model_name
,
model
=
model_name
,
input
=
"Can you tell me what the current weather is in Berlin?"
,
input
=
(
"Use tools only. Call get_weather for Berlin and get_time for Tokyo. "
"Do not answer directly."
),
tools
=
tools
,
tools
=
tools
,
stream
=
True
,
stream
=
True
,
)
)
tool_call_item
=
None
tool_call_items
=
{}
completed_event
=
None
arguments_done_events
=
{}
completed_events
=
{}
async
for
event
in
stream_response
:
async
for
event
in
stream_response
:
if
(
if
(
event
.
type
==
"response.output_item.added"
event
.
type
==
"response.output_item.added"
and
event
.
item
.
type
==
"function_call"
and
event
.
item
.
type
==
"function_call"
):
):
tool_call_item
=
event
.
item
tool_call_items
[
event
.
output_index
]
=
event
.
item
elif
event
.
type
==
"response.function_call_arguments.delta"
and
tool_call_item
:
elif
event
.
type
==
"response.function_call_arguments.delta"
:
tool_call_item
=
tool_call_items
[
event
.
output_index
]
tool_call_item
.
arguments
+=
event
.
delta
tool_call_item
.
arguments
+=
event
.
delta
elif
event
.
type
==
"response.function_call_arguments.done"
:
arguments_done_events
[
event
.
output_index
]
=
event
elif
(
elif
(
event
.
type
==
"response.output_item.done"
event
.
type
==
"response.output_item.done"
and
event
.
item
.
type
==
"function_call"
and
event
.
item
.
type
==
"function_call"
):
):
completed_event
=
event
completed_events
[
event
.
output_index
]
=
event
assert
tool_call_item
is
not
None
assert
len
(
tool_call_items
)
>=
2
assert
tool_call_item
.
type
==
"function_call"
assert
len
(
arguments_done_events
)
>=
2
assert
tool_call_item
.
name
==
"get_weather"
assert
len
(
completed_events
)
>=
2
assert
completed_event
is
not
None
assert
tool_call_item
.
arguments
==
completed_event
.
item
.
arguments
tool_calls_by_name
=
{
assert
tool_call_item
.
name
==
completed_event
.
item
.
name
event
.
item
.
name
:
(
args
=
json
.
loads
(
tool_call_item
.
arguments
)
tool_call_items
[
output_index
],
assert
"location"
in
args
arguments_done_events
[
output_index
],
assert
args
[
"location"
]
is
not
None
event
.
item
,
)
for
output_index
,
event
in
completed_events
.
items
()
}
assert
{
"get_weather"
,
"get_time"
}.
issubset
(
tool_calls_by_name
)
for
added_item
,
arguments_done_event
,
completed_item
in
tool_calls_by_name
.
values
():
assert
added_item
.
type
==
"function_call"
assert
added_item
.
arguments
==
arguments_done_event
.
arguments
assert
added_item
.
arguments
==
completed_item
.
arguments
assert
added_item
.
name
==
arguments_done_event
.
name
assert
added_item
.
name
==
completed_item
.
name
args
=
json
.
loads
(
added_item
.
arguments
)
assert
"location"
in
args
assert
args
[
"location"
]
is
not
None
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
...
...
tests/entrypoints/openai/responses/test_serving_responses.py
View file @
25b3242d
...
@@ -27,7 +27,9 @@ from openai.types.responses.tool import (
...
@@ -27,7 +27,9 @@ from openai.types.responses.tool import (
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.entrypoints.mcp.tool_server
import
ToolServer
from
vllm.entrypoints.mcp.tool_server
import
ToolServer
from
vllm.entrypoints.openai.engine.protocol
import
(
from
vllm.entrypoints.openai.engine.protocol
import
(
DeltaFunctionCall
,
DeltaMessage
,
DeltaMessage
,
DeltaToolCall
,
ErrorResponse
,
ErrorResponse
,
RequestResponseMetadata
,
RequestResponseMetadata
,
)
)
...
@@ -928,3 +930,197 @@ class TestStreamingReasoningToContentTransition:
...
@@ -928,3 +930,197 @@ class TestStreamingReasoningToContentTransition:
]
]
assert
len
(
item_done_events
)
==
1
assert
len
(
item_done_events
)
==
1
assert
isinstance
(
item_done_events
[
0
].
item
,
ResponseReasoningItem
)
assert
isinstance
(
item_done_events
[
0
].
item
,
ResponseReasoningItem
)
class
TestAutoToolStreaming
:
@
staticmethod
async
def
_collect_events
(
delta_sequence
:
list
[
DeltaMessage
]):
serving
=
_make_serving_instance_with_reasoning
()
_mock_parser_with_reasoning
(
serving
,
delta_sequence
)
contexts
=
[
_make_simple_context_with_output
(
"chunk"
,
[
i
])
for
i
in
range
(
len
(
delta_sequence
))
]
async
def
result_generator
():
for
ctx
in
contexts
:
yield
ctx
request
=
ResponsesRequest
(
input
=
"hi"
,
tools
=
[
{
"type"
:
"function"
,
"name"
:
"get_weather"
,
"description"
:
"Get weather."
,
"parameters"
:
{
"type"
:
"object"
,
"properties"
:
{
"location"
:
{
"type"
:
"string"
}},
"required"
:
[
"location"
],
"additionalProperties"
:
False
,
},
}
],
tool_choice
=
"auto"
,
stream
=
True
,
)
sampling_params
=
SamplingParams
(
max_tokens
=
64
)
metadata
=
RequestResponseMetadata
(
request_id
=
"req"
)
_identity_increment
.
_counter
=
0
# type: ignore
events
=
[]
async
for
event
in
serving
.
_process_simple_streaming_events
(
request
=
request
,
sampling_params
=
sampling_params
,
result_generator
=
result_generator
(),
context
=
SimpleContext
(),
model_name
=
"test-model"
,
tokenizer
=
MagicMock
(),
request_metadata
=
metadata
,
created_time
=
0
,
_increment_sequence_number_and_return
=
_identity_increment
,
):
events
.
append
(
event
)
return
events
@
pytest
.
mark
.
skip_global_cleanup
@
pytest
.
mark
.
asyncio
async
def
test_auto_multi_tool_streaming_opens_one_item_per_tool
(
self
,
monkeypatch
):
monkeypatch
.
setattr
(
envs
,
"VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT"
,
False
)
delta_sequence
=
[
DeltaMessage
(
tool_calls
=
[
DeltaToolCall
(
id
=
"call_vienna"
,
type
=
"function"
,
index
=
0
,
function
=
DeltaFunctionCall
(
name
=
"get_weather"
,
arguments
=
""
,
),
)
]
),
DeltaMessage
(
tool_calls
=
[
DeltaToolCall
(
index
=
0
,
function
=
DeltaFunctionCall
(
arguments
=
'{"location":"Vienna"}'
,
),
)
]
),
DeltaMessage
(
tool_calls
=
[
DeltaToolCall
(
id
=
"call_berlin"
,
type
=
"function"
,
index
=
1
,
function
=
DeltaFunctionCall
(
name
=
"get_weather"
,
arguments
=
'{"location":"Berlin"}'
,
),
)
]
),
]
events
=
await
self
.
_collect_events
(
delta_sequence
)
function_items
=
[
event
for
event
in
events
if
event
.
type
==
"response.output_item.added"
and
getattr
(
event
.
item
,
"type"
,
None
)
==
"function_call"
]
assert
len
(
function_items
)
==
2
assert
[
event
.
item
.
name
for
event
in
function_items
]
==
[
"get_weather"
,
"get_weather"
,
]
assert
[
event
.
output_index
for
event
in
function_items
]
==
[
0
,
1
]
argument_deltas
=
[
event
.
delta
for
event
in
events
if
event
.
type
==
"response.function_call_arguments.delta"
]
assert
argument_deltas
==
[
'{"location":"Vienna"}'
,
'{"location":"Berlin"}'
,
]
argument_done
=
[
event
for
event
in
events
if
event
.
type
==
"response.function_call_arguments.done"
]
assert
[
event
.
arguments
for
event
in
argument_done
]
==
[
'{"location":"Vienna"}'
,
'{"location":"Berlin"}'
,
]
assert
[
event
.
output_index
for
event
in
argument_done
]
==
[
0
,
1
]
function_done
=
[
event
for
event
in
events
if
event
.
type
==
"response.output_item.done"
and
getattr
(
event
.
item
,
"type"
,
None
)
==
"function_call"
]
assert
[
event
.
item
.
arguments
for
event
in
function_done
]
==
[
'{"location":"Vienna"}'
,
'{"location":"Berlin"}'
,
]
assert
[
event
.
output_index
for
event
in
function_done
]
==
[
0
,
1
]
@
pytest
.
mark
.
skip_global_cleanup
@
pytest
.
mark
.
asyncio
async
def
test_auto_tool_choice_first_delta_tool_call_does_not_duplicate_item
(
self
,
monkeypatch
):
monkeypatch
.
setattr
(
envs
,
"VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT"
,
False
)
delta_sequence
=
[
DeltaMessage
(
tool_calls
=
[
DeltaToolCall
(
id
=
"call_test"
,
type
=
"function"
,
index
=
0
,
function
=
DeltaFunctionCall
(
name
=
"get_weather"
,
arguments
=
""
,
),
)
]
),
DeltaMessage
(
tool_calls
=
[
DeltaToolCall
(
index
=
0
,
function
=
DeltaFunctionCall
(
arguments
=
'{"location":"Berlin"}'
,
),
)
]
),
]
events
=
await
self
.
_collect_events
(
delta_sequence
)
function_items
=
[
event
for
event
in
events
if
event
.
type
==
"response.output_item.added"
and
getattr
(
event
.
item
,
"type"
,
None
)
==
"function_call"
]
assert
len
(
function_items
)
==
1
assert
function_items
[
0
].
item
.
name
==
"get_weather"
argument_deltas
=
[
event
.
delta
for
event
in
events
if
event
.
type
==
"response.function_call_arguments.delta"
]
assert
""
.
join
(
argument_deltas
)
==
'{"location":"Berlin"}'
vllm/entrypoints/openai/responses/serving.py
View file @
25b3242d
...
@@ -1341,6 +1341,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1341,6 +1341,7 @@ class OpenAIServingResponses(OpenAIServing):
current_content_index
=
0
current_content_index
=
0
current_output_index
=
0
current_output_index
=
0
current_item_id
=
""
current_item_id
=
""
current_tool_call_index
:
int
|
None
=
None
parser
=
self
.
parser
(
tokenizer
,
request
.
tools
)
if
self
.
parser
else
None
parser
=
self
.
parser
(
tokenizer
,
request
.
tools
)
if
self
.
parser
else
None
first_delta_sent
=
False
first_delta_sent
=
False
previous_delta_messages
:
list
[
DeltaMessage
]
=
[]
previous_delta_messages
:
list
[
DeltaMessage
]
=
[]
...
@@ -1368,6 +1369,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1368,6 +1369,7 @@ class OpenAIServingResponses(OpenAIServing):
)
)
if
not
delta_message
:
if
not
delta_message
:
continue
continue
tool_call_item_started
=
False
if
not
first_delta_sent
:
if
not
first_delta_sent
:
current_item_id
=
random_uuid
()
current_item_id
=
random_uuid
()
if
delta_message
.
tool_calls
:
if
delta_message
.
tool_calls
:
...
@@ -1384,6 +1386,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1384,6 +1386,7 @@ class OpenAIServingResponses(OpenAIServing):
current_tool_call_name
=
delta_message
.
tool_calls
[
current_tool_call_name
=
delta_message
.
tool_calls
[
0
0
].
function
.
name
].
function
.
name
current_tool_call_index
=
delta_message
.
tool_calls
[
0
].
index
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
ResponseOutputItemAddedEvent
(
ResponseOutputItemAddedEvent
(
type
=
"response.output_item.added"
,
type
=
"response.output_item.added"
,
...
@@ -1394,13 +1397,12 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1394,13 +1397,12 @@ class OpenAIServingResponses(OpenAIServing):
id
=
current_item_id
,
id
=
current_item_id
,
call_id
=
current_tool_call_id
,
call_id
=
current_tool_call_id
,
name
=
current_tool_call_name
,
name
=
current_tool_call_name
,
arguments
=
delta_message
.
tool_calls
[
arguments
=
""
,
0
].
function
.
arguments
,
status
=
"in_progress"
,
status
=
"in_progress"
,
),
),
)
)
)
)
tool_call_item_started
=
True
elif
delta_message
.
reasoning
:
elif
delta_message
.
reasoning
:
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
ResponseOutputItemAddedEvent
(
ResponseOutputItemAddedEvent
(
...
@@ -1572,6 +1574,79 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1572,6 +1574,79 @@ class OpenAIServingResponses(OpenAIServing):
# reset previous delta messages
# reset previous delta messages
previous_delta_messages
=
[]
previous_delta_messages
=
[]
if
delta_message
.
tool_calls
and
delta_message
.
tool_calls
[
0
].
function
:
if
delta_message
.
tool_calls
and
delta_message
.
tool_calls
[
0
].
function
:
tool_call
=
delta_message
.
tool_calls
[
0
]
tool_call_function
=
tool_call
.
function
if
(
current_tool_call_index
is
not
None
and
tool_call
.
index
is
not
None
and
tool_call
.
index
!=
current_tool_call_index
and
tool_call_function
is
not
None
and
tool_call_function
.
name
is
not
None
):
# From one tool call to another, finalize the previous
# function-call item before opening the next one.
parts
=
[]
for
pm
in
previous_delta_messages
:
if
pm
.
tool_calls
:
previous_tool_call
=
pm
.
tool_calls
[
0
]
if
previous_tool_call
.
function
is
not
None
:
parts
.
append
(
previous_tool_call
.
function
.
arguments
or
""
)
tool_call_arguments
=
""
.
join
(
parts
)
yield
_increment_sequence_number_and_return
(
ResponseFunctionCallArgumentsDoneEvent
(
type
=
"response.function_call_arguments.done"
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
item_id
=
current_item_id
,
arguments
=
tool_call_arguments
,
name
=
current_tool_call_name
,
)
)
function_call_item
=
ResponseFunctionToolCall
(
type
=
"function_call"
,
name
=
current_tool_call_name
,
arguments
=
tool_call_arguments
,
status
=
"completed"
,
id
=
current_item_id
,
call_id
=
current_tool_call_id
,
)
yield
_increment_sequence_number_and_return
(
ResponseOutputItemDoneEvent
(
type
=
"response.output_item.done"
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
item
=
function_call_item
,
)
)
# Reset previous delta messages so the next tool call
# does not reuse arguments from the completed item.
previous_delta_messages
=
[]
current_output_index
+=
1
current_item_id
=
random_uuid
()
current_tool_call_name
=
tool_call_function
.
name
current_tool_call_id
=
f
"call_
{
random_uuid
()
}
"
current_tool_call_index
=
tool_call
.
index
yield
_increment_sequence_number_and_return
(
ResponseOutputItemAddedEvent
(
type
=
"response.output_item.added"
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
item
=
ResponseFunctionToolCallItem
(
type
=
"function_call"
,
id
=
current_item_id
,
call_id
=
current_tool_call_id
,
name
=
current_tool_call_name
,
arguments
=
""
,
status
=
"in_progress"
,
),
)
)
current_content_index
=
0
tool_call_item_started
=
True
if
delta_message
.
tool_calls
[
0
].
function
.
arguments
:
if
delta_message
.
tool_calls
[
0
].
function
.
arguments
:
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
ResponseFunctionCallArgumentsDeltaEvent
(
ResponseFunctionCallArgumentsDeltaEvent
(
...
@@ -1583,7 +1658,10 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1583,7 +1658,10 @@ class OpenAIServingResponses(OpenAIServing):
)
)
)
)
# tool call initiated with no arguments
# tool call initiated with no arguments
elif
delta_message
.
tool_calls
[
0
].
function
.
name
:
elif
(
delta_message
.
tool_calls
[
0
].
function
.
name
and
not
tool_call_item_started
):
# send done with current content part
# send done with current content part
# and add new function call item
# and add new function call item
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
...
@@ -1628,11 +1706,11 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1628,11 +1706,11 @@ class OpenAIServingResponses(OpenAIServing):
)
)
current_output_index
+=
1
current_output_index
+=
1
current_item_id
=
random_uuid
()
current_item_id
=
random_uuid
()
assert
delta_message
.
tool_calls
[
0
].
function
is
not
None
current_tool_call_name
=
delta_message
.
tool_calls
[
current_tool_call_name
=
delta_message
.
tool_calls
[
0
0
].
function
.
name
].
function
.
name
current_tool_call_id
=
f
"call_
{
random_uuid
()
}
"
current_tool_call_id
=
f
"call_
{
random_uuid
()
}
"
current_tool_call_index
=
delta_message
.
tool_calls
[
0
].
index
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
ResponseOutputItemAddedEvent
(
ResponseOutputItemAddedEvent
(
type
=
"response.output_item.added"
,
type
=
"response.output_item.added"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment