test_stateful.py 4.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio

import openai
import pytest


@pytest.mark.asyncio
async def test_store(client: openai.AsyncOpenAI):
    # By default, store is True.
    response = await client.responses.create(input="Hello!")
    assert response.status == "completed"

    # Retrieve the response.
    response = await client.responses.retrieve(response.id)
    assert response.status == "completed"

    # Test store=False.
    response = await client.responses.create(
        input="Hello!",
        store=False,
    )
    assert response.status == "completed"

    # The response should not be found.
27
    with pytest.raises(openai.NotFoundError, match="Response with id .* not found."):
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
        await client.responses.retrieve(response.id)


@pytest.mark.asyncio
async def test_background(client: openai.AsyncOpenAI):
    # NOTE: This query should be easy enough for the model to answer
    # within the 10 seconds.
    response = await client.responses.create(
        input="Hello!",
        background=True,
    )
    assert response.status == "queued"

    max_retries = 10
    for _ in range(max_retries):
        await asyncio.sleep(1)
        response = await client.responses.retrieve(response.id)
        if response.status != "queued":
            break
    print(response)

    assert response.status == "completed"


@pytest.mark.asyncio
async def test_background_error(client: openai.AsyncOpenAI):
    with pytest.raises(
55
56
        openai.BadRequestError, match="background can only be used when `store` is true"
    ):
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
        _ = await client.responses.create(
            input="What is 13 * 24?",
            background=True,
            store=False,
        )


@pytest.mark.asyncio
async def test_background_cancel(client: openai.AsyncOpenAI):
    response = await client.responses.create(
        input="Write a long story about a cat.",
        background=True,
    )
    assert response.status == "queued"

    # Cancel the response before it is completed.
73
74
75
76
77
78
79
80
81
82
83
84
    # Poll until the response is no longer queued (started processing) or timeout
    loop = asyncio.get_running_loop()
    start_time = loop.time()
    max_wait_seconds = 5.0
    poll_interval = 0.1
    while loop.time() - start_time < max_wait_seconds:
        response = await client.responses.retrieve(response.id)
        if response.status != "queued":
            # Started processing or completed - try to cancel
            break
        await asyncio.sleep(poll_interval)

85
86
87
    response = await client.responses.cancel(response.id)
    assert response.status == "cancelled"

88
89
90
91
92
93
94
    # Make sure the response status remains unchanged after some time.
    max_retries = 10
    for _ in range(max_retries):
        await asyncio.sleep(0.5)
        response = await client.responses.retrieve(response.id)
        # Verify status is still cancelled
        assert response.status == "cancelled"
95
96
97
98
99
100
101


@pytest.mark.asyncio
async def test_cancel_completed(client: openai.AsyncOpenAI):
    response = await client.responses.create(input="Hello")
    assert response.status == "completed"

102
103
104
    with pytest.raises(
        openai.BadRequestError, match="Cannot cancel a synchronous response."
    ):
105
106
107
108
109
110
111
112
        await client.responses.cancel(response.id)


@pytest.mark.asyncio
async def test_previous_response_id(client: openai.AsyncOpenAI):
    response1 = await client.responses.create(
        instructions="You are tested on your ability to retrieve the correct "
        "information from the previous response.",
113
114
        input="Hello, my name is John.",
    )
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

    response2 = await client.responses.create(
        input="Actually, my name is not John. My real name is Mark.",
        previous_response_id=response1.id,
    )

    response3 = await client.responses.create(
        input="What is my real name again? Answer in one word.",
        previous_response_id=response2.id,
    )
    print(response3)
    assert "Mark" in response3.output[-1].content[0].text
    assert "John" not in response3.output[-1].content[0].text


@pytest.mark.asyncio
async def test_two_responses_with_same_prev_id(client: openai.AsyncOpenAI):
    response1 = await client.responses.create(
        instructions="You are tested on your ability to retrieve the correct "
        "information from the previous response.",
135
136
        input="Hello, my name is John.",
    )
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

    # Both response 2 and 3 use response 1 as the previous response.
    response2 = client.responses.create(
        input="Actually, my name is not John. My name is Mark.",
        previous_response_id=response1.id,
    )
    response3 = client.responses.create(
        input="What is my name again? Answer in one word.",
        previous_response_id=response1.id,
    )

    _ = await response2
    response3_result = await response3
    print(response3_result)
    assert "John" in response3_result.output[-1].content[0].text
    assert "Mark" not in response3_result.output[-1].content[0].text