test_sampler.py 12.1 KB
Newer Older
1
2
3
4
import random
from typing import Tuple
from unittest.mock import patch

Woosuk Kwon's avatar
Woosuk Kwon committed
5
import pytest
6
import torch
7
from transformers import GenerationConfig, GenerationMixin
8
9
10
11

from vllm.model_executor.layers.sampler import Sampler
from vllm.model_executor.utils import set_random_seed
from vllm.sequence import SamplingParams, SequenceData, SequenceGroupMetadata
Woosuk Kwon's avatar
Woosuk Kwon committed
12
from vllm.worker.model_runner import ModelRunner
13
14
15
16
17
18
19
20
21


class MockLogitsSampler(Sampler):

    def __init__(self, vocab_size: int, fake_logits: torch.Tensor):
        super().__init__(vocab_size=vocab_size)
        self.fake_logits = fake_logits

    def forward(self, *args, **kwargs):
22
23
24
25
26
        with patch(
                "vllm.model_executor.layers.sampler._prune_hidden_states",
                lambda x, y: x), patch(
                    "vllm.model_executor.layers.sampler.Sampler._get_logits",
                    lambda *args, **kwargs: self.fake_logits):
27
            return super().forward(*args, **kwargs)
28
29
30
31


def _prepare_test(
    batch_size: int
Woosuk Kwon's avatar
Woosuk Kwon committed
32
) -> Tuple[torch.Tensor, torch.Tensor, MockLogitsSampler, ModelRunner]:
33
34
35
36
37
38
39
40
41
    vocab_size = 32000
    input_tensor = torch.rand((batch_size, 1024),
                              device="cuda",
                              dtype=torch.float16)
    fake_logits = torch.full((batch_size, vocab_size),
                             1e-2,
                             device=input_tensor.device,
                             dtype=input_tensor.dtype)
    sampler = MockLogitsSampler(32000, fake_logits)
42
    model_runner = ModelRunner(None, None, None, None)
Woosuk Kwon's avatar
Woosuk Kwon committed
43
    return input_tensor, fake_logits, sampler, model_runner
44
45
46
47
48
49
50
51
52


RANDOM_SEEDS = list(range(128))


@pytest.mark.parametrize("seed", RANDOM_SEEDS)
def test_sampler_all_greedy(seed: int):
    set_random_seed(seed)
    batch_size = random.randint(1, 256)
Woosuk Kwon's avatar
Woosuk Kwon committed
53
54
    input_tensor, fake_logits, sampler, model_runner = _prepare_test(
        batch_size)
55
56

    seq_group_metadata_list = []
Woosuk Kwon's avatar
Woosuk Kwon committed
57
    prompt_lens = []
58
59
60
61
62
63
64
65
66
    for i in range(batch_size):
        seq_group_metadata_list.append(
            SequenceGroupMetadata(
                request_id=f"test_{i}",
                is_prompt=True,
                seq_data={0: SequenceData([1, 2, 3])},
                sampling_params=SamplingParams(temperature=0, ),
                block_tables={0: [1]},
            ))
Woosuk Kwon's avatar
Woosuk Kwon committed
67
        prompt_lens.append(seq_group_metadata_list[-1].seq_data[0].get_len())
68

Woosuk Kwon's avatar
Woosuk Kwon committed
69
    sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
70
71
                                                     prompt_lens,
                                                     subquery_lens=prompt_lens)
72
73
    sampler_output = sampler(embedding=None,
                             hidden_states=input_tensor,
Woosuk Kwon's avatar
Woosuk Kwon committed
74
                             sampling_metadata=sampling_metadata)
75
76
    expected = torch.argmax(fake_logits, dim=-1)
    for i, sequence_output in enumerate(sampler_output):
Woosuk Kwon's avatar
Woosuk Kwon committed
77
        for nth_output in sequence_output.samples:
78
79
            assert nth_output.output_token == expected[i].item()

Simon Mo's avatar
Simon Mo committed
80
81
    del model_runner

82
83
84
85
86

@pytest.mark.parametrize("seed", RANDOM_SEEDS)
def test_sampler_all_random(seed: int):
    set_random_seed(seed)
    batch_size = random.randint(1, 256)
Woosuk Kwon's avatar
Woosuk Kwon committed
87
88
    input_tensor, fake_logits, sampler, model_runner = _prepare_test(
        batch_size)
89
90
91
92
93

    for i in range(batch_size):
        fake_logits[i, i] = 1e2

    seq_group_metadata_list = []
Woosuk Kwon's avatar
Woosuk Kwon committed
94
    prompt_lens = []
95
96
97
98
99
100
101
102
103
104
105
106
    for i in range(batch_size):
        seq_group_metadata_list.append(
            SequenceGroupMetadata(
                request_id=f"test_{i}",
                is_prompt=True,
                seq_data={0: SequenceData([1, 2, 3])},
                sampling_params=SamplingParams(
                    temperature=1.0,
                    n=random.randint(1, 10),
                ),
                block_tables={0: [1]},
            ))
Woosuk Kwon's avatar
Woosuk Kwon committed
107
        prompt_lens.append(seq_group_metadata_list[-1].seq_data[0].get_len())
108

Woosuk Kwon's avatar
Woosuk Kwon committed
109
    sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
110
111
                                                     prompt_lens,
                                                     subquery_lens=prompt_lens)
112
113
    sampler_output = sampler(embedding=None,
                             hidden_states=input_tensor,
Woosuk Kwon's avatar
Woosuk Kwon committed
114
                             sampling_metadata=sampling_metadata)
115
    for i, sequence_output in enumerate(sampler_output):
Woosuk Kwon's avatar
Woosuk Kwon committed
116
        for nth_output in sequence_output.samples:
117
118
            assert nth_output.output_token == i

Simon Mo's avatar
Simon Mo committed
119
120
    del model_runner

121
122
123
124
125

@pytest.mark.parametrize("seed", RANDOM_SEEDS)
def test_sampler_all_beam(seed: int):
    set_random_seed(seed)
    batch_size = random.randint(1, 256)
Woosuk Kwon's avatar
Woosuk Kwon committed
126
    input_tensor, _, sampler, model_runner = _prepare_test(batch_size)
127
128

    seq_group_metadata_list = []
Woosuk Kwon's avatar
Woosuk Kwon committed
129
    prompt_lens = []
130
131
132
133
134
135
136
137
138
139
140
141
142
    for i in range(batch_size):
        seq_group_metadata_list.append(
            SequenceGroupMetadata(
                request_id=f"test_{i}",
                is_prompt=True,
                seq_data={0: SequenceData([1, 2, 3])},
                sampling_params=SamplingParams(
                    temperature=0,
                    best_of=2,
                    use_beam_search=True,
                ),
                block_tables={0: [1]},
            ))
Woosuk Kwon's avatar
Woosuk Kwon committed
143
        prompt_lens.append(seq_group_metadata_list[-1].seq_data[0].get_len())
144

Woosuk Kwon's avatar
Woosuk Kwon committed
145
    sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
146
147
                                                     prompt_lens,
                                                     subquery_lens=prompt_lens)
148
149
    sampler(embedding=None,
            hidden_states=input_tensor,
Woosuk Kwon's avatar
Woosuk Kwon committed
150
            sampling_metadata=sampling_metadata)
151
152
153
154
    # no assertion here as I am not sure how to determine whether
    # the outputs are expected - in other words, this just tests
    # whether there are no exceptions in the sampler
    # when handling an all-beam search case.
Simon Mo's avatar
Simon Mo committed
155
    del model_runner
156
157
158
159
160
161


@pytest.mark.parametrize("seed", RANDOM_SEEDS)
def test_sampler_mixed(seed: int):
    set_random_seed(seed)
    batch_size = random.randint(1, 256)
Woosuk Kwon's avatar
Woosuk Kwon committed
162
163
    input_tensor, fake_logits, sampler, model_runner = _prepare_test(
        batch_size)
164
165
166

    seq_group_metadata_list = []
    expected_tokens = []
Woosuk Kwon's avatar
Woosuk Kwon committed
167
    prompt_lens = []
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
    for i in range(batch_size):
        n = 1
        sampling_type = random.randint(0, 2)
        if sampling_type == 0:
            sampling_params = SamplingParams(temperature=0)
        elif sampling_type == 1:
            n = random.randint(1, 10)
            sampling_params = SamplingParams(
                temperature=random.random() + 0.1,
                top_p=min(random.random() + 0.1, 1),
                top_k=random.randint(0, 10) or -1,
                n=n,
                presence_penalty=random.randint(0, 1),
            )
        else:
            sampling_params = SamplingParams(temperature=0,
                                             use_beam_search=True,
                                             best_of=2)
        for idx in range(n):
            fake_logits[i, i + idx] = 1e2
            expected_tokens.append(i + idx)
        seq_group_metadata_list.append(
            SequenceGroupMetadata(
                request_id=f"test_{i}",
                is_prompt=True,
                seq_data={0: SequenceData([1, 2, 3])},
                sampling_params=sampling_params,
                block_tables={0: [1]},
            ))
Woosuk Kwon's avatar
Woosuk Kwon committed
197
        prompt_lens.append(seq_group_metadata_list[-1].seq_data[0].get_len())
198

Woosuk Kwon's avatar
Woosuk Kwon committed
199
    sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
200
201
                                                     prompt_lens,
                                                     subquery_lens=prompt_lens)
202
203
    sampler_output = sampler(embedding=None,
                             hidden_states=input_tensor,
Woosuk Kwon's avatar
Woosuk Kwon committed
204
                             sampling_metadata=sampling_metadata)
205
206
207
    for i, sequence_output in enumerate(sampler_output):
        if seq_group_metadata_list[i].sampling_params.use_beam_search:
            continue
Woosuk Kwon's avatar
Woosuk Kwon committed
208
        for nth_output in sequence_output.samples:
209
            assert nth_output.output_token in expected_tokens
210

Simon Mo's avatar
Simon Mo committed
211
212
    del model_runner

213
214
215
216
217

@pytest.mark.parametrize("seed", RANDOM_SEEDS)
def test_sampler_logits_processors(seed: int):
    set_random_seed(seed)
    batch_size = random.randint(1, 256)
Woosuk Kwon's avatar
Woosuk Kwon committed
218
    input_tensor, _, sampler, model_runner = _prepare_test(batch_size)
219
220
221
222
223
224
225
226
227

    # This sample logits processor gives infinite score to the i-th token,
    # where i is the length of the input sequence.
    # We therefore expect the output token sequence to be [0, 1, 2, ...]
    def pick_ith(token_ids, logits):
        logits[len(token_ids)] = float("inf")
        return logits

    seq_group_metadata_list = []
Woosuk Kwon's avatar
Woosuk Kwon committed
228
    prompt_lens = []
229
230
231
232
233
234
235
236
237
238
    for i in range(batch_size):
        seq_group_metadata_list.append(
            SequenceGroupMetadata(
                request_id=f"test_{i}",
                is_prompt=True,
                seq_data={0: SequenceData([1, 2, 3])},
                sampling_params=SamplingParams(temperature=0,
                                               logits_processors=[pick_ith]),
                block_tables={0: [1]},
            ))
Woosuk Kwon's avatar
Woosuk Kwon committed
239
        prompt_lens.append(seq_group_metadata_list[-1].seq_data[0].get_len())
240

Woosuk Kwon's avatar
Woosuk Kwon committed
241
    sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
242
243
                                                     prompt_lens,
                                                     subquery_lens=prompt_lens)
244
245
    sampler_output = sampler(embedding=None,
                             hidden_states=input_tensor,
Woosuk Kwon's avatar
Woosuk Kwon committed
246
                             sampling_metadata=sampling_metadata)
247
    for _, sequence_output in enumerate(sampler_output):
248
249
        for idx, nth_output in enumerate(sequence_output.samples):
            assert nth_output.output_token == idx
250

Simon Mo's avatar
Simon Mo committed
251
252
    del model_runner

253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

@pytest.mark.parametrize("seed", RANDOM_SEEDS)
def test_sampler_top_k_top_p(seed: int):
    set_random_seed(seed)
    batch_size = random.randint(1, 256)
    top_k = random.randint(100, 500)
    top_p = random.random() * 0.1
    vocab_size = 32000
    input_tensor = torch.rand((batch_size, 1024),
                              device="cuda",
                              dtype=torch.float16)
    fake_logits = torch.normal(0,
                               5,
                               size=(batch_size, vocab_size),
                               device=input_tensor.device,
                               dtype=input_tensor.dtype)
    sampler = MockLogitsSampler(32000, fake_logits)
270
    model_runner = ModelRunner(None, None, None, None)
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296

    generation_model = GenerationMixin()
    generation_config = GenerationConfig(top_k=top_k,
                                         top_p=top_p,
                                         do_sample=True)
    warpers = generation_model._get_logits_warper(generation_config)
    assert len(warpers) == 2  # top_p and top_k

    seq_group_metadata_list = []
    prompt_lens = []
    for i in range(batch_size):
        seq_group_metadata_list.append(
            SequenceGroupMetadata(
                request_id=f"test_{i}",
                is_prompt=True,
                seq_data={0: SequenceData([1, 2, 3])},
                sampling_params=SamplingParams(
                    temperature=1,
                    top_k=top_k,
                    top_p=top_p,
                ),
                block_tables={0: [1]},
            ))
        prompt_lens.append(seq_group_metadata_list[-1].seq_data[0].get_len())

    sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
297
298
                                                     prompt_lens,
                                                     subquery_lens=prompt_lens)
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314

    sample_probs = None

    def mock_sample(probs, logprobs, sampling_metadata):
        nonlocal sample_probs
        sample_probs = probs
        return [[prob.topk(1, dim=-1).indices.tolist(), [0]] for prob in probs]

    with patch("vllm.model_executor.layers.sampler._sample", mock_sample):
        sampler(embedding=None,
                hidden_states=input_tensor,
                sampling_metadata=sampling_metadata)
    hf_probs = warpers(torch.zeros_like(fake_logits), fake_logits.clone())
    hf_probs = torch.softmax(hf_probs, dim=-1, dtype=torch.float)
    assert torch.allclose(hf_probs, sample_probs, atol=1e-5)
    assert torch.equal(hf_probs.eq(0), sample_probs.eq(0))
Simon Mo's avatar
Simon Mo committed
315
316

    del model_runner