test_santacoder.py 3.06 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import pytest

from text_generation.pb import generate_pb2
from text_generation.models.causal_lm import CausalLMBatch
from text_generation.models.santacoder import SantaCoder


@pytest.fixture(scope="session")
def default_santacoder():
    return SantaCoder("bigcode/santacoder")


@pytest.fixture
def default_pb_request(default_pb_parameters, default_pb_stop_parameters):
    return generate_pb2.Request(
        id=0,
        inputs="def",
        input_length=1,
        parameters=default_pb_parameters,
        stopping_parameters=default_pb_stop_parameters,
    )


@pytest.fixture
def default_pb_batch(default_pb_request):
    return generate_pb2.Batch(id=0, requests=[default_pb_request], size=1)


@pytest.fixture
def default_fim_pb_request(default_pb_parameters, default_pb_stop_parameters):
    return generate_pb2.Request(
        id=0,
        inputs="<fim-prefix>def<fim-suffix>world<fim-middle>",
        input_length=5,
        parameters=default_pb_parameters,
        stopping_parameters=default_pb_stop_parameters,
    )


@pytest.fixture
def default_fim_pb_batch(default_fim_pb_request):
    return generate_pb2.Batch(id=0, requests=[default_fim_pb_request], size=1)


45
@pytest.mark.skip
46
47
48
49
50
51
52
def test_santacoder_generate_token_completion(default_santacoder, default_pb_batch):
    batch = CausalLMBatch.from_pb(
        default_pb_batch, default_santacoder.tokenizer, default_santacoder.device
    )
    next_batch = batch

    for _ in range(batch.stopping_criterias[0].max_new_tokens - 1):
53
54
        generated_texts, next_batch = default_santacoder.generate_token(next_batch)
        assert generated_texts == []
55

56
    generated_texts, next_batch = default_santacoder.generate_token(next_batch)
57
58
    assert next_batch is None

59
60
61
62
    assert len(generated_texts) == 1
    assert generated_texts[0].output_text == "def test_get_all_users_with_"
    assert generated_texts[0].request == batch.requests[0]
    assert len(generated_texts[0].tokens) == len(generated_texts[0].logprobs)
63
    assert (
64
        generated_texts[0].generated_tokens
65
66
67
68
        == batch.stopping_criterias[0].max_new_tokens
    )


69
@pytest.mark.skip
70
71
72
73
74
75
76
77
78
def test_fim_santacoder_generate_token_completion(
    default_santacoder, default_fim_pb_batch
):
    batch = CausalLMBatch.from_pb(
        default_fim_pb_batch, default_santacoder.tokenizer, default_santacoder.device
    )
    next_batch = batch

    for _ in range(batch.stopping_criterias[0].max_new_tokens - 1):
79
80
        generated_texts, next_batch = default_santacoder.generate_token(next_batch)
        assert generated_texts == []
81

82
    generated_texts, next_batch = default_santacoder.generate_token(next_batch)
83
84
    assert next_batch is None

85
    assert len(generated_texts) == 1
86
    assert (
87
        generated_texts[0].output_text
88
89
        == """<fim-prefix>def<fim-suffix>world<fim-middle>ineProperty(exports, "__esModule", { value"""
    )
90
91
    assert generated_texts[0].request == batch.requests[0]
    assert len(generated_texts[0].tokens) == len(generated_texts[0].logprobs)
92
    assert (
93
        generated_texts[0].generated_tokens
94
95
        == batch.stopping_criterias[0].max_new_tokens
    )