test_chat_completions.py 5.01 KB
Newer Older
1
2
3
4
5
from typing import List

import openai
import pytest

6
7
from .utils import (MESSAGES_WITHOUT_TOOLS, WEATHER_TOOL, ServerConfig,
                    ensure_system_prompt)
8
9
10
11
12
13


# test: make sure chat completions without tools provided work even when tools
# are enabled. This makes sure tool call chat templates work, AND that the tool
# parser stream processing doesn't change the output of the model.
@pytest.mark.asyncio
14
15
async def test_chat_completion_without_tools(client: openai.AsyncOpenAI,
                                             server_config: ServerConfig):
16
17
18
    models = await client.models.list()
    model_name: str = models.data[0].id
    chat_completion = await client.chat.completions.create(
19
        messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
20
        temperature=0,
21
        max_completion_tokens=150,
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
        model=model_name,
        logprobs=False)
    choice = chat_completion.choices[0]
    stop_reason = chat_completion.choices[0].finish_reason
    output_text = chat_completion.choices[0].message.content

    # check to make sure we got text
    assert output_text is not None
    assert len(output_text) > 0
    assert stop_reason != "tool_calls"

    # check to make sure no tool calls were returned
    assert (choice.message.tool_calls is None
            or len(choice.message.tool_calls) == 0)

    # make the same request, streaming
    stream = await client.chat.completions.create(
39
        messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
40
        temperature=0,
41
        max_completion_tokens=150,
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
        model=model_name,
        logprobs=False,
        stream=True,
    )
    chunks: List[str] = []
    finish_reason_count = 0
    role_sent: bool = False

    # assemble streamed chunks
    async for chunk in stream:
        delta = chunk.choices[0].delta

        # make sure the role is assistant
        if delta.role:
            assert not role_sent
            assert delta.role == 'assistant'
            role_sent = True

        if delta.content:
            chunks.append(delta.content)

        if chunk.choices[0].finish_reason is not None:
            finish_reason_count += 1
            assert chunk.choices[0].finish_reason == choice.finish_reason

        # make sure tool call chunks aren't being streamed
        assert not delta.tool_calls or len(delta.tool_calls) == 0

    # make sure the role was sent, only 1 finish reason was sent, that chunks
    # were in fact sent, and that the chunks match non-streaming
    assert role_sent
    assert finish_reason_count == 1
    assert len(chunks)
    assert "".join(chunks) == output_text


# test: conversation with tools enabled and provided that should not invoke
# tools, to make sure we can still get normal chat completion responses
# and that they won't be parsed as tools
@pytest.mark.asyncio
82
83
async def test_chat_completion_with_tools(client: openai.AsyncOpenAI,
                                          server_config: ServerConfig):
84
85
86
    models = await client.models.list()
    model_name: str = models.data[0].id
    chat_completion = await client.chat.completions.create(
87
        messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
88
        temperature=0,
89
        max_completion_tokens=150,
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
        model=model_name,
        tools=[WEATHER_TOOL],
        logprobs=False)
    choice = chat_completion.choices[0]
    stop_reason = chat_completion.choices[0].finish_reason
    output_text = chat_completion.choices[0].message.content

    # check to make sure we got text
    assert output_text is not None
    assert stop_reason != 'tool_calls'
    assert len(output_text) > 0

    # check to make sure no tool calls were returned
    assert (choice.message.tool_calls is None
            or len(choice.message.tool_calls) == 0)

    # make the same request, streaming
    stream = await client.chat.completions.create(
108
        messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
109
        temperature=0,
110
        max_completion_tokens=150,
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
        model=model_name,
        logprobs=False,
        tools=[WEATHER_TOOL],
        stream=True,
    )

    chunks: List[str] = []
    finish_reason_count = 0
    role_sent: bool = False

    # assemble streamed chunks
    async for chunk in stream:
        delta = chunk.choices[0].delta

        # make sure the role is assistant
        if delta.role:
            assert delta.role == 'assistant'
            role_sent = True

        if delta.content:
            chunks.append(delta.content)

        if chunk.choices[0].finish_reason is not None:
            finish_reason_count += 1

        # make sure tool call chunks aren't being streamed
        assert not delta.tool_calls or len(delta.tool_calls) == 0

    # make sure the role was sent, only 1 finish reason was sent, that chunks
    # were in fact sent, and that the chunks match non-streaming
    assert role_sent
    assert finish_reason_count == 1
    assert chunk.choices[0].finish_reason == stop_reason
    assert chunk.choices[0].finish_reason != 'tool_calls'
    assert len(chunks)
    assert "".join(chunks) == output_text