[Bugfix] Fix crash when tool_choice=required exceeds max_tokens (#36841)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>

[Bugfix] Fix crash when tool_choice=required exceeds max_tokens (#36841)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
5a71cdd7 · Chauncey · GitHub · f0d3658c · 5a71cdd7 · 5a71cdd7
Unverified Commit 5a71cdd7 authored Mar 12, 2026 by Chauncey Committed by GitHub Mar 12, 2026
3 changed files
--- a/tests/entrypoints/openai/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/test_completion_with_function_calling.py
@@ -514,3 +514,27 @@ async def test_inconsistent_tool_choice_and_tools(
            ],
            tool_choice={},
        )
+@pytest.mark.asyncio
+async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI):
+    """ """
+    models = await client.models.list()
+    model_name: str = models.data[0].id
+    # This combination previously crashed the engine
+    chat_completion = await client.chat.completions.create(
+        messages=messages,
+        temperature=0,
+        max_completion_tokens=1,
+        model=model_name,
+        tools=tools,
+        tool_choice="required",
+    )
+    # When `tool_choice="required"` and the tokens of `tools` exceed `max_tokens`,
+    # both `tool_calls` and `content` should be empty.
+    # This behavior should be consistent with OpenAI.
+    choice = chat_completion.choices[0]
+    assert choice.finish_reason == "length"
+    assert len(choice.message.tool_calls) == 0
+    assert choice.message.content == ""
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -1507,7 +1507,7 @@ class OpenAIServingChat(OpenAIServing):
            elif request.tool_choice and request.tool_choice == "required":
                tool_call_class_items = []
-                assert tool_calls is not None and len(tool_calls) > 0
+                tool_calls = tool_calls or []
                for idx, tool_call in enumerate(tool_calls):
                    # Use native ID if available,
                    # otherwise generate ID with correct id_type

--- a/vllm/entrypoints/openai/engine/serving.py
+++ b/vllm/entrypoints/openai/engine/serving.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import asyncio
+import contextlib
 import json
 import time
 from collections.abc import AsyncGenerator, Callable, Mapping, Sequence
@@ -13,7 +14,7 @@ from fastapi import Request
 from openai.types.responses import (
    ToolChoiceFunction,
 )
-from pydantic import ConfigDict, TypeAdapter
+from pydantic import ConfigDict, TypeAdapter, ValidationError
 from starlette.datastructures import Headers
 import vllm.envs as envs
@@ -1125,16 +1126,18 @@ class OpenAIServing:
            )
            content = None  # Clear content since tool is called.
        elif request.tool_choice == "required":
-            assert content is not None
+            tool_calls = []
-            tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)
+            with contextlib.suppress(ValidationError):
-            function_calls.extend(
+                content = content or ""
-                [
+                tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(
+                    content
+                )
+            for tool_call in tool_calls:
+                function_calls.append(
                    FunctionCall(
                        name=tool_call.name,
                        arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
                    )
-                    for tool_call in tool_calls
-                ]
                )
            content = None  # Clear content since tool is called.
        elif (