test_tool_calls_serialization.py 5.42 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for tool_calls Iterable → list materialisation.

Regression tests for https://github.com/vllm-project/vllm/issues/34792.

Setting VLLM_LOGGING_LEVEL=debug caused tool calling to break for Mistral
models because:
  1. The OpenAI Python SDK types tool_calls as Iterable[...] in
     ChatCompletionAssistantMessageParam.
  2. Pydantic v2, when validating from Python objects (not from raw JSON),
     wraps Iterable fields in a one-shot lazy iterator.
  3. Debug logging called model_dump_json() which consumed that iterator.
  4. The Mistral tokenizer then saw empty tool_calls and raised
     "ValueError: Unexpected tool call id ...".
"""

import pytest

from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest


def _make_tool_call(tc_id: str, name: str, args: str) -> dict:
    return {
        "id": tc_id,
        "type": "function",
        "function": {"name": name, "arguments": args},
    }


def _make_request(messages: list) -> ChatCompletionRequest:
    return ChatCompletionRequest(
        model="test-model",
        messages=messages,
    )


def test_tool_calls_list_preserved_after_model_dump():
    """tool_calls in assistant messages must be readable after model_dump_json.

    When the request is built from Python dicts (as in the Anthropic → OpenAI
    conversion path), Pydantic v2 previously wrapped the Iterable tool_calls
    in a one-shot iterator.  model_dump_json() consumed it, leaving subsequent
    readers (e.g. the Mistral tokenizer) with an empty sequence.
    """
    tool_call = _make_tool_call("call_abc123", "get_weather", '{"city": "Paris"}')
    messages = [
        {"role": "user", "content": "What is the weather in Paris?"},
        {"role": "assistant", "content": None, "tool_calls": [tool_call]},
        {
            "role": "tool",
            "tool_call_id": "call_abc123",
            "content": '{"temperature": 20}',
        },
    ]

    req = _make_request(messages)

    # Simulate debug logging: serialize the model (this was the trigger)
    _ = req.model_dump_json()

    # The assistant message must still have accessible tool_calls afterwards
    assistant_msg = req.messages[1]
    assert isinstance(assistant_msg, dict)
    tool_calls = assistant_msg.get("tool_calls")
    assert tool_calls is not None, "tool_calls must not be None after model_dump_json"
    assert isinstance(tool_calls, list), "tool_calls must be a list"
    assert len(tool_calls) > 0, "tool_calls must not be empty after model_dump_json"


def test_tool_calls_from_generator_are_materialised():
    """tool_calls passed as a generator must be converted to list on validation."""
    tool_call = _make_tool_call("call_gen1", "search", '{"query": "vllm"}')

    def tool_calls_gen():
        yield tool_call

    messages = [
        {"role": "user", "content": "Search for vllm"},
        {
            "role": "assistant",
            "content": None,
            "tool_calls": tool_calls_gen(),  # one-shot generator
        },
    ]

    req = _make_request(messages)
    assistant_msg = req.messages[1]
    assert isinstance(assistant_msg, dict)

    # Iterate twice — must not raise or return empty on second pass
    tool_calls_first = list(assistant_msg.get("tool_calls", []))
    tool_calls_second = list(assistant_msg.get("tool_calls", []))

    assert len(tool_calls_first) == 1, "First read must return the tool call"
    assert len(tool_calls_second) == 1, "Second read must also return the tool call"


def test_tool_calls_list_passthrough():
    """tool_calls already provided as a list must remain a list."""
    tool_call = _make_tool_call("call_list1", "calculate", '{"expr": "2+2"}')
    messages = [
        {"role": "user", "content": "Calculate 2+2"},
        {"role": "assistant", "content": None, "tool_calls": [tool_call]},
    ]

    req = _make_request(messages)
    assistant_msg = req.messages[1]
    assert isinstance(assistant_msg, dict)
    assert isinstance(assistant_msg.get("tool_calls"), list)


def test_messages_without_tool_calls_unaffected():
    """Messages without tool_calls must be handled correctly."""
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Hello!"},
        {"role": "assistant", "content": "Hi there!"},
    ]

    req = _make_request(messages)
    # None of the messages should have tool_calls injected
    for msg in req.messages:
        assert isinstance(msg, dict)
        assert msg.get("tool_calls") is None or msg.get("tool_calls") == []


@pytest.mark.parametrize("num_tool_calls", [1, 3])
def test_multiple_tool_calls_materialised(num_tool_calls: int):
    """Multiple tool calls in a single message are all preserved."""
    tool_calls = [
        _make_tool_call(f"call_{i}", f"func_{i}", f'{{"arg": {i}}}')
        for i in range(num_tool_calls)
    ]
    messages = [
        {"role": "user", "content": "Do things"},
        {"role": "assistant", "content": None, "tool_calls": iter(tool_calls)},
    ]

    req = _make_request(messages)
    assistant_msg = req.messages[1]
    assert isinstance(assistant_msg, dict)

    result_tool_calls = assistant_msg.get("tool_calls")
    assert isinstance(result_tool_calls, list)
    assert len(result_tool_calls) == num_tool_calls

    # Verify after model_dump_json too
    _ = req.model_dump_json()
    assert len(assistant_msg.get("tool_calls", [])) == num_tool_calls