test_client.py 4.63 KB
Newer Older
1
2
3
4
5
6
7
import pytest

from text_generation import Client, AsyncClient
from text_generation.errors import NotFoundError, ValidationError
from text_generation.types import FinishReason, PrefillToken, Token


8
9
def test_generate(flan_t5_xxl_url, hf_headers):
    client = Client(flan_t5_xxl_url, hf_headers)
10
11
    response = client.generate("test", max_new_tokens=1)

12
    assert response.generated_text == ""
13
14
15
16
    assert response.details.finish_reason == FinishReason.Length
    assert response.details.generated_tokens == 1
    assert response.details.seed is None
    assert len(response.details.prefill) == 1
17
    assert response.details.prefill[0] == PrefillToken(id=0, text="<pad>", logprob=None)
18
19
    assert len(response.details.tokens) == 1
    assert response.details.tokens[0] == Token(
20
        id=3, text="", logprob=-1.984375, special=False
21
22
23
    )


24
25
26
27
28
29
30
31
32
33
def test_generate_best_of(flan_t5_xxl_url, hf_headers):
    client = Client(flan_t5_xxl_url, hf_headers)
    response = client.generate("test", max_new_tokens=1, best_of=2, do_sample=True)

    assert response.details.seed is not None
    assert response.details.best_of_sequences is not None
    assert len(response.details.best_of_sequences) == 1
    assert response.details.best_of_sequences[0].seed is not None


34
35
36
37
38
39
40
41
42
43
44
45
def test_generate_not_found(fake_url, hf_headers):
    client = Client(fake_url, hf_headers)
    with pytest.raises(NotFoundError):
        client.generate("test")


def test_generate_validation_error(flan_t5_xxl_url, hf_headers):
    client = Client(flan_t5_xxl_url, hf_headers)
    with pytest.raises(ValidationError):
        client.generate("test", max_new_tokens=10_000)


46
47
def test_generate_stream(flan_t5_xxl_url, hf_headers):
    client = Client(flan_t5_xxl_url, hf_headers)
48
49
50
51
52
53
54
    responses = [
        response for response in client.generate_stream("test", max_new_tokens=1)
    ]

    assert len(responses) == 1
    response = responses[0]

55
    assert response.generated_text == ""
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
    assert response.details.finish_reason == FinishReason.Length
    assert response.details.generated_tokens == 1
    assert response.details.seed is None


def test_generate_stream_not_found(fake_url, hf_headers):
    client = Client(fake_url, hf_headers)
    with pytest.raises(NotFoundError):
        list(client.generate_stream("test"))


def test_generate_stream_validation_error(flan_t5_xxl_url, hf_headers):
    client = Client(flan_t5_xxl_url, hf_headers)
    with pytest.raises(ValidationError):
        list(client.generate_stream("test", max_new_tokens=10_000))


@pytest.mark.asyncio
74
75
async def test_generate_async(flan_t5_xxl_url, hf_headers):
    client = AsyncClient(flan_t5_xxl_url, hf_headers)
76
77
    response = await client.generate("test", max_new_tokens=1)

78
    assert response.generated_text == ""
79
80
81
82
    assert response.details.finish_reason == FinishReason.Length
    assert response.details.generated_tokens == 1
    assert response.details.seed is None
    assert len(response.details.prefill) == 1
83
    assert response.details.prefill[0] == PrefillToken(id=0, text="<pad>", logprob=None)
84
85
    assert len(response.details.tokens) == 1
    assert response.details.tokens[0] == Token(
86
        id=3, text="", logprob=-1.984375, special=False
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    )


@pytest.mark.asyncio
async def test_generate_async_not_found(fake_url, hf_headers):
    client = AsyncClient(fake_url, hf_headers)
    with pytest.raises(NotFoundError):
        await client.generate("test")


@pytest.mark.asyncio
async def test_generate_async_validation_error(flan_t5_xxl_url, hf_headers):
    client = AsyncClient(flan_t5_xxl_url, hf_headers)
    with pytest.raises(ValidationError):
        await client.generate("test", max_new_tokens=10_000)


@pytest.mark.asyncio
105
106
async def test_generate_stream_async(flan_t5_xxl_url, hf_headers):
    client = AsyncClient(flan_t5_xxl_url, hf_headers)
107
108
109
110
111
112
113
    responses = [
        response async for response in client.generate_stream("test", max_new_tokens=1)
    ]

    assert len(responses) == 1
    response = responses[0]

114
    assert response.generated_text == ""
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
    assert response.details.finish_reason == FinishReason.Length
    assert response.details.generated_tokens == 1
    assert response.details.seed is None


@pytest.mark.asyncio
async def test_generate_stream_async_not_found(fake_url, hf_headers):
    client = AsyncClient(fake_url, hf_headers)
    with pytest.raises(NotFoundError):
        async for _ in client.generate_stream("test"):
            pass


@pytest.mark.asyncio
async def test_generate_stream_async_validation_error(flan_t5_xxl_url, hf_headers):
    client = AsyncClient(flan_t5_xxl_url, hf_headers)
    with pytest.raises(ValidationError):
        async for _ in client.generate_stream("test", max_new_tokens=10_000):
            pass