test_function_call.py 13 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import json

import openai  # use the official client for correctness check
import pytest

MODEL_NAME = "Qwen/Qwen3-1.7B"
tools = [
    {
        "type": "function",
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "city": {
                    "type": "string",
                    "description": "The city to find the weather for, e.g. 'Vienna'",
                    "default": "Vienna",
                },
                "country": {
                    "type": "string",
                    "description": "The country that the city is in, e.g. 'Austria'",
                },
                "unit": {
                    "type": "string",
                    "description": "The unit to fetch the temperature in",
                    "enum": ["celsius", "fahrenheit"],
                },
                "options": {
                    "$ref": "#/$defs/WeatherOptions",
                    "description": "Optional parameters for weather query",
                },
            },
            "required": ["country", "unit"],
            "$defs": {
                "WeatherOptions": {
                    "title": "WeatherOptions",
                    "type": "object",
                    "additionalProperties": False,
                    "properties": {
                        "unit": {
                            "type": "string",
                            "enum": ["celsius", "fahrenheit"],
                            "default": "celsius",
                            "description": "Temperature unit",
                            "title": "Temperature Unit",
                        },
                        "include_forecast": {
                            "type": "boolean",
                            "default": False,
                            "description": "Whether to include a 24-hour forecast",
                            "title": "Include Forecast",
                        },
                        "language": {
                            "type": "string",
                            "default": "zh-CN",
                            "description": "Language of the response",
                            "title": "Language",
                            "enum": ["zh-CN", "en-US", "ja-JP"],
                        },
                    },
                },
            },
        },
    },
    {
        "type": "function",
        "name": "get_forecast",
        "description": "Get the weather forecast for a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "city": {
                    "type": "string",
                    "description": "The city to get the forecast for, e.g. 'Vienna'",
                    "default": "Vienna",
                },
                "country": {
                    "type": "string",
                    "description": "The country that the city is in, e.g. 'Austria'",
                },
                "days": {
                    "type": "integer",
                    "description": "Number of days to get the forecast for (1-7)",
                },
                "unit": {
                    "type": "string",
                    "description": "The unit to fetch the temperature in",
                    "enum": ["celsius", "fahrenheit"],
                },
            },
            "required": ["country", "days", "unit"],
        },
    },
]


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("tool_choice", ["auto", "required"])
async def test_function_tool_use(
    client: openai.AsyncOpenAI, model_name: str, tool_choice: str
):
    prompt = [
        {
            "role": "user",
            "content": "Can you tell me what the current weather is in Berlin and the "
            "forecast for the next 5 days, in fahrenheit?",
        },
    ]
    response = await client.responses.create(
        model=model_name,
        input=prompt,
        tools=tools,
        tool_choice=tool_choice,
119
        temperature=0.0,
120
121
122
123
124
125
126
127
128
    )
    assert len(response.output) >= 1
    tool_call = None
    reasoning = None
    for out in response.output:
        if out.type == "function_call":
            tool_call = out
        if out.type == "reasoning":
            reasoning = out
129
130
131
132
133
134
135
136
137
    if response.incomplete_details is None:
        assert tool_call is not None
        assert tool_call.type == "function_call"
        assert json.loads(tool_call.arguments) is not None
        assert reasoning is not None
        assert reasoning.type == "reasoning"
    else:
        print(response.model_dump_json(indent=2))
        assert response.incomplete_details.reason == "max_output_tokens"
138
139


140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_max_tokens_with_tool_choice_required(
    client: openai.AsyncOpenAI, model_name: str
):
    prompt = [
        {
            "role": "user",
            "content": "Can you tell me what the current weather is in Berlin and the "
            "forecast for the next 5 days, in fahrenheit?",
        },
    ]
    response = await client.responses.create(
        model=model_name,
        input=prompt,
        tools=tools,
        tool_choice="required",
        max_output_tokens=10,
    )
    assert len(response.output) >= 1
    for out in response.output:
        # When `tool_choice="required"` and the tokens of `tools`
        # exceed `max_output_tokens`,`function_call` should be empty.
        # This behavior should be consistent with OpenAI
        assert out.type != "function_call"
    assert response.incomplete_details.reason == "max_output_tokens"


168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
@pytest.mark.asyncio
async def test_named_tool_use(client: openai.AsyncOpenAI):
    def get_weather(latitude: float, longitude: float) -> str:
        """
        Mock function to simulate getting weather data.
        In a real application, this would call an external weather API.
        """
        return f"Current temperature at ({latitude}, {longitude}) is 20°C."

    tools = [
        {
            "type": "function",
            "name": "get_weather",
            "description": (
                "Get current temperature for provided coordinates in celsius."
            ),
            "parameters": {
                "type": "object",
                "properties": {
                    "latitude": {"type": "number"},
                    "longitude": {"type": "number"},
                },
                "required": ["latitude", "longitude"],
                "additionalProperties": False,
            },
            "strict": True,
        }
    ]

    input_messages = [
        {"role": "user", "content": "What's the weather like in Paris today?"}
    ]

    response = await client.responses.create(
        model=MODEL_NAME,
        input=input_messages,
        tools=tools,
        tool_choice={"type": "function", "name": "get_weather"},
    )
    assert len(response.output) >= 1
    for out in response.output:
        if out.type == "function_call":
            tool_call = out
    assert tool_call is not None
    assert tool_call.type == "function_call"
    assert tool_call.name == "get_weather"
    args = json.loads(tool_call.arguments)
    assert args["latitude"] is not None
    assert args["longitude"] is not None
    # call the tool
    result = get_weather(args["latitude"], args["longitude"])
    input_messages.append(tool_call)  # append model's function call message
    input_messages.append(
        {  # append result message
            "type": "function_call_output",
            "call_id": tool_call.call_id,
            "output": str(result),
        }
    )
    # create a new response with the tool call result
    response_2 = await client.responses.create(model=MODEL_NAME, input=input_messages)
    # check the output
    assert len(response_2.output_text) > 0
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_function_calling_with_streaming_expected_arguments(
    client: openai.AsyncOpenAI, model_name: str
):
    tools = [
        {
            "type": "function",
            "name": "get_weather",
            "description": "Get current temperature for provided location in celsius.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {"type": "string"},
                },
                "required": ["location"],
                "additionalProperties": False,
            },
            "strict": True,
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
        },
        {
            "type": "function",
            "name": "get_time",
            "description": "Get current local time for provided location.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {"type": "string"},
                },
                "required": ["location"],
                "additionalProperties": False,
            },
            "strict": True,
        },
267
268
269
270
    ]

    stream_response = await client.responses.create(
        model=model_name,
271
272
273
274
        input=(
            "Use tools only. Call get_weather for Berlin and get_time for Tokyo. "
            "Do not answer directly."
        ),
275
276
277
278
        tools=tools,
        stream=True,
    )

279
280
281
    tool_call_items = {}
    arguments_done_events = {}
    completed_events = {}
282
283
284
285
286
    async for event in stream_response:
        if (
            event.type == "response.output_item.added"
            and event.item.type == "function_call"
        ):
287
288
289
            tool_call_items[event.output_index] = event.item
        elif event.type == "response.function_call_arguments.delta":
            tool_call_item = tool_call_items[event.output_index]
290
            tool_call_item.arguments += event.delta
291
292
        elif event.type == "response.function_call_arguments.done":
            arguments_done_events[event.output_index] = event
293
294
295
296
        elif (
            event.type == "response.output_item.done"
            and event.item.type == "function_call"
        ):
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
            completed_events[event.output_index] = event
    assert len(tool_call_items) >= 2
    assert len(arguments_done_events) >= 2
    assert len(completed_events) >= 2

    tool_calls_by_name = {
        event.item.name: (
            tool_call_items[output_index],
            arguments_done_events[output_index],
            event.item,
        )
        for output_index, event in completed_events.items()
    }
    assert {"get_weather", "get_time"}.issubset(tool_calls_by_name)
    for added_item, arguments_done_event, completed_item in tool_calls_by_name.values():
        assert added_item.type == "function_call"
        assert added_item.arguments == arguments_done_event.arguments
        assert added_item.arguments == completed_item.arguments
        assert added_item.name == arguments_done_event.name
        assert added_item.name == completed_item.name
        args = json.loads(added_item.arguments)
        assert "location" in args
        assert args["location"] is not None
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_function_calling_with_streaming_types(
    client: openai.AsyncOpenAI, model_name: str
):
    # this links the "done" type with the "start" type
    # so every "done" type should have a corresponding "start" type
    # and every open block should be closed by the end of the stream
    pairs_of_event_types = {
        "response.completed": "response.created",
        "response.output_item.done": "response.output_item.added",
        "response.output_text.done": "response.output_text.delta",
        "response.content_part.done": "response.content_part.added",
        "response.reasoning_text.done": "response.reasoning_text.delta",
        "response.reasoning_part.done": "response.reasoning_part.added",
        "response.function_call_arguments.done": "response.function_call_arguments.delta",  # noqa
    }

    input_list = [
        {
            "role": "user",
            "content": "Can you tell me what the current weather is in Berlin?",
        }
    ]
    stream_response = await client.responses.create(
        model=model_name,
        input=input_list,
        tools=tools,
        stream=True,
    )

    stack_of_event_types = []
    async for event in stream_response:
        if event.type == "response.created":
            stack_of_event_types.append(event.type)
        elif event.type == "response.completed":
            assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
            stack_of_event_types.pop()
        if event.type.endswith("added"):
            stack_of_event_types.append(event.type)
        elif event.type.endswith("delta"):
            if stack_of_event_types[-1] == event.type:
                continue
            stack_of_event_types.append(event.type)
        elif event.type.endswith("done"):
            assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
            stack_of_event_types.pop()
    assert len(stack_of_event_types) == 0