test_client.py 5.34 KB
Newer Older
1
2
3
4
import pytest

from text_generation import Client, AsyncClient
from text_generation.errors import NotFoundError, ValidationError
5
from text_generation.types import FinishReason, InputToken
6
7


8
9
def test_generate(llama_7b_url, hf_headers):
    client = Client(llama_7b_url, hf_headers)
10
    response = client.generate("test", max_new_tokens=1, decoder_input_details=True)
11

12
    assert response.generated_text == "_"
13
14
15
    assert response.details.finish_reason == FinishReason.Length
    assert response.details.generated_tokens == 1
    assert response.details.seed is None
16
17
    assert len(response.details.prefill) == 2
    assert response.details.prefill[0] == InputToken(id=1, text="<s>", logprob=None)
18
    assert len(response.details.tokens) == 1
19
20
    assert response.details.tokens[0].id == 29918
    assert response.details.tokens[0].text == "_"
21
    assert not response.details.tokens[0].special
22
23


24
25
def test_generate_best_of(llama_7b_url, hf_headers):
    client = Client(llama_7b_url, hf_headers)
26
27
28
    response = client.generate(
        "test", max_new_tokens=1, best_of=2, do_sample=True, decoder_input_details=True
    )
29
30
31
32
33
34
35

    assert response.details.seed is not None
    assert response.details.best_of_sequences is not None
    assert len(response.details.best_of_sequences) == 1
    assert response.details.best_of_sequences[0].seed is not None


36
37
38
39
40
41
def test_generate_not_found(fake_url, hf_headers):
    client = Client(fake_url, hf_headers)
    with pytest.raises(NotFoundError):
        client.generate("test")


42
43
def test_generate_validation_error(llama_7b_url, hf_headers):
    client = Client(llama_7b_url, hf_headers)
44
45
46
47
    with pytest.raises(ValidationError):
        client.generate("test", max_new_tokens=10_000)


48
49
def test_generate_stream(llama_7b_url, hf_headers):
    client = Client(llama_7b_url, hf_headers)
50
51
52
53
54
55
56
    responses = [
        response for response in client.generate_stream("test", max_new_tokens=1)
    ]

    assert len(responses) == 1
    response = responses[0]

57
    assert response.generated_text == "_"
58
59
60
61
62
63
64
65
66
67
68
    assert response.details.finish_reason == FinishReason.Length
    assert response.details.generated_tokens == 1
    assert response.details.seed is None


def test_generate_stream_not_found(fake_url, hf_headers):
    client = Client(fake_url, hf_headers)
    with pytest.raises(NotFoundError):
        list(client.generate_stream("test"))


69
70
def test_generate_stream_validation_error(llama_7b_url, hf_headers):
    client = Client(llama_7b_url, hf_headers)
71
72
73
74
75
    with pytest.raises(ValidationError):
        list(client.generate_stream("test", max_new_tokens=10_000))


@pytest.mark.asyncio
76
77
async def test_generate_async(llama_7b_url, hf_headers):
    client = AsyncClient(llama_7b_url, hf_headers)
78
79
80
    response = await client.generate(
        "test", max_new_tokens=1, decoder_input_details=True
    )
81

82
    assert response.generated_text == "_"
83
84
85
    assert response.details.finish_reason == FinishReason.Length
    assert response.details.generated_tokens == 1
    assert response.details.seed is None
86
87
88
89
90
    assert len(response.details.prefill) == 2
    assert response.details.prefill[0] == InputToken(id=1, text="<s>", logprob=None)
    assert response.details.prefill[1] == InputToken(
        id=1243, text="test", logprob=-10.96875
    )
91
    assert len(response.details.tokens) == 1
92
93
    assert response.details.tokens[0].id == 29918
    assert response.details.tokens[0].text == "_"
94
    assert not response.details.tokens[0].special
95
96


97
@pytest.mark.asyncio
98
99
async def test_generate_async_best_of(llama_7b_url, hf_headers):
    client = AsyncClient(llama_7b_url, hf_headers)
100
    response = await client.generate(
101
        "test", max_new_tokens=1, best_of=2, do_sample=True, decoder_input_details=True
102
103
104
105
106
107
108
109
    )

    assert response.details.seed is not None
    assert response.details.best_of_sequences is not None
    assert len(response.details.best_of_sequences) == 1
    assert response.details.best_of_sequences[0].seed is not None


110
111
112
113
114
115
116
117
@pytest.mark.asyncio
async def test_generate_async_not_found(fake_url, hf_headers):
    client = AsyncClient(fake_url, hf_headers)
    with pytest.raises(NotFoundError):
        await client.generate("test")


@pytest.mark.asyncio
118
119
async def test_generate_async_validation_error(llama_7b_url, hf_headers):
    client = AsyncClient(llama_7b_url, hf_headers)
120
121
122
123
124
    with pytest.raises(ValidationError):
        await client.generate("test", max_new_tokens=10_000)


@pytest.mark.asyncio
125
126
async def test_generate_stream_async(llama_7b_url, hf_headers):
    client = AsyncClient(llama_7b_url, hf_headers)
127
128
129
130
131
132
133
    responses = [
        response async for response in client.generate_stream("test", max_new_tokens=1)
    ]

    assert len(responses) == 1
    response = responses[0]

134
    assert response.generated_text == "_"
135
136
137
138
139
140
141
142
143
144
145
146
147
148
    assert response.details.finish_reason == FinishReason.Length
    assert response.details.generated_tokens == 1
    assert response.details.seed is None


@pytest.mark.asyncio
async def test_generate_stream_async_not_found(fake_url, hf_headers):
    client = AsyncClient(fake_url, hf_headers)
    with pytest.raises(NotFoundError):
        async for _ in client.generate_stream("test"):
            pass


@pytest.mark.asyncio
149
150
async def test_generate_stream_async_validation_error(llama_7b_url, hf_headers):
    client = AsyncClient(llama_7b_url, hf_headers)
151
152
153
    with pytest.raises(ValidationError):
        async for _ in client.generate_stream("test", max_new_tokens=10_000):
            pass