test_tool_calls_serialization.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for tool_calls Iterable → list materialisation.

Regression tests for https://github.com/vllm-project/vllm/issues/34792.

Setting VLLM_LOGGING_LEVEL=debug caused tool calling to break for Mistral
models because:
  1. The OpenAI Python SDK types tool_calls as Iterable[...] in
     ChatCompletionAssistantMessageParam.
  2. Pydantic v2, when validating from Python objects (not from raw JSON),
     wraps Iterable fields in a one-shot lazy iterator.
  3. Debug logging called model_dump_json() which consumed that iterator.
  4. The Mistral tokenizer then saw empty tool_calls and raised
     "ValueError: Unexpected tool call id ...".
"""

import pytest

from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest


def _make_tool_call(tc_id: str, name: str, args: str) -> dict:
    return {
        "id": tc_id,
        "type": "function",
        "function": {"name": name, "arguments": args},
    }


def _make_request(messages: list) -> ChatCompletionRequest:
    return ChatCompletionRequest(
        model="test-model",
        messages=messages,
    )


def test_tool_calls_list_preserved_after_model_dump():
    """tool_calls in assistant messages must be readable after model_dump_json.

    When the request is built from Python dicts (as in the Anthropic → OpenAI
    conversion path), Pydantic v2 previously wrapped the Iterable tool_calls
    in a one-shot iterator.  model_dump_json() consumed it, leaving subsequent
    readers (e.g. the Mistral tokenizer) with an empty sequence.
    """
    tool_call = _make_tool_call("call_abc123", "get_weather", '{"city": "Paris"}')
    messages = [
        {"role": "user", "content": "What is the weather in Paris?"},
        {"role": "assistant", "content": None, "tool_calls": [tool_call]},
        {
            "role": "tool",
            "tool_call_id": "call_abc123",
            "content": '{"temperature": 20}',
        },
    ]

    req = _make_request(messages)

    # Simulate debug logging: serialize the model (this was the trigger)
    _ = req.model_dump_json()

    # The assistant message must still have accessible tool_calls afterwards
    assistant_msg = req.messages[1]
    assert isinstance(assistant_msg, dict)
    tool_calls = assistant_msg.get("tool_calls")
    assert tool_calls is not None, "tool_calls must not be None after model_dump_json"
    assert isinstance(tool_calls, list), "tool_calls must be a list"
    assert len(tool_calls) > 0, "tool_calls must not be empty after model_dump_json"


def test_tool_calls_from_generator_are_materialised():
    """tool_calls passed as a generator must be converted to list on validation."""
    tool_call = _make_tool_call("call_gen1", "search", '{"query": "vllm"}')

    def tool_calls_gen():
        yield tool_call

    messages = [
        {"role": "user", "content": "Search for vllm"},
        {
            "role": "assistant",
            "content": None,
            "tool_calls": tool_calls_gen(),  # one-shot generator
        },
    ]

    req = _make_request(messages)
    assistant_msg = req.messages[1]
    assert isinstance(assistant_msg, dict)

    # Iterate twice — must not raise or return empty on second pass
    tool_calls_first = list(assistant_msg.get("tool_calls", []))
    tool_calls_second = list(assistant_msg.get("tool_calls", []))

    assert len(tool_calls_first) == 1, "First read must return the tool call"
    assert len(tool_calls_second) == 1, "Second read must also return the tool call"


def test_tool_calls_list_passthrough():
    """tool_calls already provided as a list must remain a list."""
    tool_call = _make_tool_call("call_list1", "calculate", '{"expr": "2+2"}')
    messages = [
        {"role": "user", "content": "Calculate 2+2"},
        {"role": "assistant", "content": None, "tool_calls": [tool_call]},
    ]

    req = _make_request(messages)
    assistant_msg = req.messages[1]
    assert isinstance(assistant_msg, dict)
    assert isinstance(assistant_msg.get("tool_calls"), list)


def test_messages_without_tool_calls_unaffected():
    """Messages without tool_calls must be handled correctly."""
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Hello!"},
        {"role": "assistant", "content": "Hi there!"},
    ]

    req = _make_request(messages)
    # None of the messages should have tool_calls injected
    for msg in req.messages:
        assert isinstance(msg, dict)
        assert msg.get("tool_calls") is None or msg.get("tool_calls") == []


@pytest.mark.parametrize("num_tool_calls", [1, 3])
def test_multiple_tool_calls_materialised(num_tool_calls: int):
    """Multiple tool calls in a single message are all preserved."""
    tool_calls = [
        _make_tool_call(f"call_{i}", f"func_{i}", f'{{"arg": {i}}}')
        for i in range(num_tool_calls)
    ]
    messages = [
        {"role": "user", "content": "Do things"},
        {"role": "assistant", "content": None, "tool_calls": iter(tool_calls)},
    ]

    req = _make_request(messages)
    assistant_msg = req.messages[1]
    assert isinstance(assistant_msg, dict)

    result_tool_calls = assistant_msg.get("tool_calls")
    assert isinstance(result_tool_calls, list)
    assert len(result_tool_calls) == num_tool_calls

    # Verify after model_dump_json too
    _ = req.model_dump_json()
    assert len(assistant_msg.get("tool_calls", [])) == num_tool_calls