Unverified Commit c6bb5b56 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[BugFix] Fix engine crash caused by chat tools + response_format (#32127)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent 9273a427
......@@ -151,3 +151,45 @@ async def test_chat_completion_with_tools(
assert chunk.choices[0].finish_reason != "tool_calls"
assert len(chunks)
assert "".join(chunks) == output_text
# Regression test for https://github.com/vllm-project/vllm/issues/32006
# Engine crash when combining response_format: json_object with
# tool_choice: required
@pytest.mark.asyncio
@pytest.mark.timeout(120)
async def test_response_format_with_tool_choice_required(
client: openai.AsyncOpenAI, server_config: ServerConfig
):
"""
Test that combining response_format: json_object with tool_choice: required
doesn't crash the engine.
Before the fix, this would cause a validation error:
"You can only use one kind of structured outputs constraint but multiple
are specified" because both json_object and json (from tool schema) would
be set in StructuredOutputsParams.
"""
models = await client.models.list()
model_name: str = models.data[0].id
# This combination previously crashed the engine
chat_completion = await client.chat.completions.create(
messages=ensure_system_prompt(
[{"role": "user", "content": "What is the weather in Dallas, Texas?"}],
server_config,
),
temperature=0,
max_completion_tokens=150,
model=model_name,
tools=[WEATHER_TOOL],
tool_choice="required",
response_format={"type": "json_object"},
)
# The fix clears response_format when tool_choice forces tool calling,
# so the request should complete successfully with tool calls
choice = chat_completion.choices[0]
assert choice.finish_reason == "tool_calls"
assert choice.message.tool_calls is not None
assert len(choice.message.tool_calls) > 0
......@@ -67,6 +67,7 @@ class ToolParser:
# tool_choice: "Forced Function" or "required" will override
# structured output json settings to make tool calling work correctly
request.structured_outputs.json = json_schema_from_tool
request.response_format = None
if isinstance(request, ResponsesRequest):
request.text = ResponseTextConfig()
request.text.format = ResponseFormatTextJSONSchemaConfig(
......
......@@ -370,6 +370,10 @@ class InputProcessor:
# Remember that this backend was set automatically
params.structured_outputs._backend_was_auto = True
# Run post-init validation. This is also important to ensure subsequent
# roundtrip serialization/deserialization won't fail.
params.structured_outputs.__post_init__()
def _maybe_build_mm_uuids(
self,
request_id: str,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment