[BugFix] Fix engine crash caused by chat tools + response_format (#32127)

Signed-off-by: Nick Hill <nickhill123@gmail.com>

[BugFix] Fix engine crash caused by chat tools + response_format (#32127)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
c6bb5b56 · Nick Hill · GitHub · 9273a427 · c6bb5b56 · c6bb5b56
Unverified Commit c6bb5b56 authored Jan 12, 2026 by Nick Hill Committed by GitHub Jan 13, 2026
3 changed files
--- a/tests/tool_use/test_chat_completions.py
+++ b/tests/tool_use/test_chat_completions.py
@@ -151,3 +151,45 @@ async def test_chat_completion_with_tools(
    assert chunk.choices[0].finish_reason != "tool_calls"
    assert len(chunks)
    assert "".join(chunks) == output_text
+
+
+# Regression test for https://github.com/vllm-project/vllm/issues/32006
+# Engine crash when combining response_format: json_object with
+# tool_choice: required
+@pytest.mark.asyncio
+@pytest.mark.timeout(120)
+async def test_response_format_with_tool_choice_required(
+    client: openai.AsyncOpenAI, server_config: ServerConfig
+):
+    """
+    Test that combining response_format: json_object with tool_choice: required
+    doesn't crash the engine.
+
+    Before the fix, this would cause a validation error:
+    "You can only use one kind of structured outputs constraint but multiple
+    are specified" because both json_object and json (from tool schema) would
+    be set in StructuredOutputsParams.
+    """
+    models = await client.models.list()
+    model_name: str = models.data[0].id
+
+    # This combination previously crashed the engine
+    chat_completion = await client.chat.completions.create(
+        messages=ensure_system_prompt(
+            [{"role": "user", "content": "What is the weather in Dallas, Texas?"}],
+            server_config,
+        ),
+        temperature=0,
+        max_completion_tokens=150,
+        model=model_name,
+        tools=[WEATHER_TOOL],
+        tool_choice="required",
+        response_format={"type": "json_object"},
+    )
+
+    # The fix clears response_format when tool_choice forces tool calling,
+    # so the request should complete successfully with tool calls
+    choice = chat_completion.choices[0]
+    assert choice.finish_reason == "tool_calls"
+    assert choice.message.tool_calls is not None
+    assert len(choice.message.tool_calls) > 0
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -67,6 +67,7 @@ class ToolParser:
                # tool_choice: "Forced Function" or "required" will override
                # structured output json settings to make tool calling work correctly
                request.structured_outputs.json = json_schema_from_tool
+                request.response_format = None
            if isinstance(request, ResponsesRequest):
                request.text = ResponseTextConfig()
                request.text.format = ResponseFormatTextJSONSchemaConfig(

--- a/vllm/v1/engine/input_processor.py
+++ b/vllm/v1/engine/input_processor.py
@@ -370,6 +370,10 @@ class InputProcessor:
            # Remember that this backend was set automatically
            params.structured_outputs._backend_was_auto = True

+        # Run post-init validation. This is also important to ensure subsequent
+        # roundtrip serialization/deserialization won't fail.
+        params.structured_outputs.__post_init__()
+
    def _maybe_build_mm_uuids(
        self,
        request_id: str,