conftest.py 988 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
import pytest

from transformers import AutoTokenizer

from text_generation.pb import generate_pb2


@pytest.fixture
def default_pb_parameters():
OlivierDehaene's avatar
OlivierDehaene committed
10
    return generate_pb2.NextTokenChooserParameters(
11
12
13
14
15
16
17
        temperature=1.0,
        top_k=0,
        top_p=1.0,
        do_sample=False,
    )


18
19
20
21
22
@pytest.fixture
def default_pb_stop_parameters():
    return generate_pb2.StoppingCriteriaParameters(stop_sequences=[], max_new_tokens=10)


23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
@pytest.fixture(scope="session")
def bloom_560m_tokenizer():
    return AutoTokenizer.from_pretrained("bigscience/bloom-560m", padding_side="left")


@pytest.fixture(scope="session")
def gpt2_tokenizer():
    tokenizer = AutoTokenizer.from_pretrained("gpt2", padding_side="left")
    tokenizer.pad_token_id = 50256
    return tokenizer


@pytest.fixture(scope="session")
def mt0_small_tokenizer():
    tokenizer = AutoTokenizer.from_pretrained(
        "bigscience/mt0-small", padding_side="left"
    )
    tokenizer.bos_token_id = 0
    return tokenizer