test_sampler.py 3.91 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
import random

5
6
import pytest

7
from vllm import LLM
8
9
10
11
12
from vllm.platforms import current_platform
from vllm.sampling_params import SamplingParams


@pytest.mark.parametrize("model_name", ["Qwen/Qwen2.5-1.5B-Instruct"])
13
@pytest.mark.skipif(not current_platform.is_tpu(), reason="This test needs a TPU")
14
15
def test_sampler_different(model_name: str):
    """
16
    Test significantly different sampling params to assert the model produces
17
18
    different results.
    """
19
20
21
22
23
24
25
26
    llm = LLM(
        model_name,
        enforce_eager=False,
        max_num_seqs=1,
        max_model_len=512,
        max_num_batched_tokens=256,
    )
    prompts = ["Write a short story about a robot that dreams for the first time."]
27
28
29
30
31
32
    sampling_params = SamplingParams(temperature=0.9, min_p=0.2, max_tokens=64)
    output = llm.generate(prompts, sampling_params)

    sampling_params = SamplingParams(temperature=0.1, min_p=0.8, max_tokens=64)
    output2 = llm.generate(prompts, sampling_params)
    assert output[0].outputs[0].text != output2[0].outputs[0].text
33
34
35
36
37

    with pytest.raises(ValueError):
        # Unsupported `seed` param.
        sampling_params = SamplingParams(temperature=0.3, seed=42)
        output2 = llm.generate(prompts, sampling_params)
38

39
    # Batch-case with TopK/P
40
41
42
43
44
45
46
47
    for B in [4, 16]:
        p = prompts * B
        sampling_params = [
            SamplingParams(
                temperature=0.1,
                min_p=0.8,
                max_tokens=64,
                # Vary number of ks
48
                top_k=random.randint(4, 12),
49
50
51
                top_p=random.random(),
            )
            for _ in range(B)
52
        ]
53
        # Make sure first two reqs have the same K/P
54
55
        sampling_params[0] = sampling_params[1]
        output = llm.generate(p, sampling_params)
56
57
58
59
        # There are natural numerical instabilities that make it difficult
        # to have deterministic results over many tokens, tests the first ~20
        # tokens match.
        assert output[0].outputs[0].text[:20] == output[1].outputs[0].text[:20]
60
61
62
63
64


@pytest.mark.parametrize("model_name", ["Qwen/Qwen2.5-1.5B-Instruct"])
# TODO TPU will appear busy if we fan-out test params here
@pytest.mark.parametrize("n_prompts", [1])
65
@pytest.mark.skipif(not current_platform.is_tpu(), reason="This test needs a TPU")
66
67
68
def test_logprobs(model_name: str, n_prompts: int):
    """
    Request top logprobs with different sampling settings and check
69
    that results contains the requested number, ordered ascendingly.
70
71
72
73
74
75
    """

    def check_num_logprobs(logprobs, expected_num: int):
        for step in logprobs:
            prev_logp = 1.0
            # order by rank
76
            sorted_step = dict(sorted(step.items(), key=lambda item: item[1].rank))
77
78
79
80
81
82
83
84
85

            # Can contain the sampled token
            assert len(step) == expected_num or len(step) == expected_num + 1
            # Check results are ordered by prob value
            for rankno, (tid, logp) in enumerate(sorted_step.items()):
                assert logp.logprob <= prev_logp
                prev_logp = logp.logprob
                assert logp.rank == rankno + 1

86
87
88
89
90
91
92
    llm = LLM(
        model_name,
        enforce_eager=False,
        max_num_seqs=1,
        max_model_len=128,
        max_num_batched_tokens=128,
    )
93
94
95
    prompts = [
        "Write a short story about a robot that dreams for the first time."
    ] * n_prompts
96
97
98
99
100
    greedy_sampling_params = SamplingParams(temperature=0.0, max_tokens=64, logprobs=4)
    regular_sampling_params = SamplingParams(temperature=0.4, max_tokens=64, logprobs=4)
    topkp_sampling_params = SamplingParams(
        temperature=0.4, max_tokens=64, logprobs=4, top_k=12, top_p=0.5
    )
101

102
    for sp in [greedy_sampling_params, regular_sampling_params, topkp_sampling_params]:
103
104
105
        output = llm.generate(prompts, sp)
        for o in output:
            check_num_logprobs(o.outputs[0].logprobs, 4)