test_speculative_sampling.py 3.97 KB
Newer Older
1
import pytest
2
3
4
5
import torch
import torch.nn.functional as F
from sgl_kernel import tree_speculative_sampling_target_only

PGFLMG's avatar
PGFLMG committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
test_cases = [
    (
        1,
        1,
        [3, -1, -1, 4, 5, 18, 11, -1, -1, -1, 12, 18],
        [[0, 3, 4, 5], [6, 10, 11, -1]],
        [3, 2],
    ),
    (
        0,  # threshold_single
        0,  # threshold_acc
        [1, 2, 18, -1, -1, -1, 11, -1, -1, -1, 12, 18],
        [[0, 1, 2, -1], [6, 10, 11, -1]],
        [2, 2],
    ),
]


@pytest.mark.parametrize(
    "threshold_single, threshold_acc, expected_predicts, expected_accept_index, expected_accept_token_num",
    test_cases,
)
def test_tree_speculative_sampling_target_only(
    threshold_single,
    threshold_acc,
    expected_predicts,
    expected_accept_index,
    expected_accept_token_num,
):
    """
    Tests the tree_speculative_sampling_target_only function using Pytest parameterization.
    """
    device = "cuda"
39
40
41
42
43
44

    candidates = torch.tensor(
        [
            [0, 1, 2, 3, 4, 5],
            [7, 8, 9, 10, 11, 12],
        ],
45
        dtype=torch.int64,
PGFLMG's avatar
PGFLMG committed
46
        device=device,
47
48
49
50
51
52
    )
    retrive_index = torch.tensor(
        [
            [0, 1, 2, 3, 4, 5],
            [6, 7, 8, 9, 10, 11],
        ],
53
        dtype=torch.int64,
PGFLMG's avatar
PGFLMG committed
54
        device=device,
55
56
57
58
59
60
    )
    retrive_next_token = torch.tensor(
        [
            [1, 2, -1, 4, 5, -1],
            [4, 2, 3, -1, 5, -1],
        ],
61
        dtype=torch.int64,
PGFLMG's avatar
PGFLMG committed
62
        device=device,
63
64
65
66
67
68
    )
    retrive_next_sibling = torch.tensor(
        [
            [-1, 3, -1, -1, -1, -1],
            [-1, -1, -1, -1, 1, -1],
        ],
69
        dtype=torch.int64,
PGFLMG's avatar
PGFLMG committed
70
        device=device,
71
72
    )

PGFLMG's avatar
PGFLMG committed
73
    target_logits = torch.full((2, 6, 20), 1, dtype=torch.float32, device=device)
74
75
76
77
78
    target_logits[0, 0, 3] = 10
    target_logits[0, 3, 4] = 10
    target_logits[0, 4, 5] = 10
    target_logits[1, 0, 11] = 10
    target_logits[1, 4, 12] = 10
PGFLMG's avatar
PGFLMG committed
79

80
81
    for i in range(target_logits.shape[0]):
        for j in range(target_logits.shape[1]):
PGFLMG's avatar
PGFLMG committed
82
83
            if torch.max(target_logits[i, j]) < 10:
                target_logits[i, j, 18] = 10
84

PGFLMG's avatar
PGFLMG committed
85
86
87
88
    temperatures = torch.tensor([0.01, 0.01], dtype=torch.float32, device=device)
    bs, num_draft_tokens = candidates.shape
    num_spec_step = len(expected_accept_index[0])
    predict_shape = (len(expected_predicts),)
89

PGFLMG's avatar
PGFLMG committed
90
91
92
    predicts = torch.full(predict_shape, -1, dtype=torch.int32, device=device)
    accept_index = torch.full((bs, num_spec_step), -1, dtype=torch.int32, device=device)
    accept_token_num = torch.full((bs,), 0, dtype=torch.int32, device=device)
93
94
95

    expanded_temperature = temperatures.unsqueeze(1).unsqueeze(1)
    target_probs = F.softmax(target_logits / expanded_temperature, dim=-1)
PGFLMG's avatar
PGFLMG committed
96
97
    draft_probs = torch.full_like(target_probs, 0, dtype=torch.float32, device=device)
    coins = torch.rand(bs, num_draft_tokens, device=device, dtype=torch.float32)
98
    coins_for_final_sampling = torch.rand(bs, device=device).to(torch.float32)
99
100
101
102
103
104
105
106
107
108

    tree_speculative_sampling_target_only(
        predicts=predicts,
        accept_index=accept_index,
        accept_token_num=accept_token_num,
        candidates=candidates,
        retrive_index=retrive_index,
        retrive_next_token=retrive_next_token,
        retrive_next_sibling=retrive_next_sibling,
        uniform_samples=coins,
109
        uniform_samples_for_final_sampling=coins_for_final_sampling,
110
111
        target_probs=target_probs,
        draft_probs=draft_probs,
112
113
        threshold_single=threshold_single,
        threshold_acc=threshold_acc,
114
115
116
        deterministic=True,
    )

PGFLMG's avatar
PGFLMG committed
117
118
119
120
121
122
123
124
125
    assert (
        predicts.tolist() == expected_predicts
    ), f"Predicts mismatch for thresholds ({threshold_single}, {threshold_acc})"
    assert (
        accept_index.tolist() == expected_accept_index
    ), f"Accept index mismatch for thresholds ({threshold_single}, {threshold_acc})"
    assert (
        accept_token_num.tolist() == expected_accept_token_num
    ), f"Accept token num mismatch for thresholds ({threshold_single}, {threshold_acc})"
126
127
128


if __name__ == "__main__":
129
    pytest.main([__file__])