test_moe_align.py 2.51 KB
Newer Older
1
2
3
4
5
import torch
from sgl_kernel import moe_align_block_size


def test_moe_align_block_size():
6
    # For DeepSeek V3, we have 256 experts
7
    num_experts = 256
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

    # Test different combinations of block_size, num_tokens and topk
    for block_size in [32, 64, 128, 256]:
        print(f"\nTesting block_size={block_size}")
        for num_tokens in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096]:
            for topk in [1, 2, 4, 8, 16, 32, 64]:
                print(
                    f"Testing block_size={block_size}, num_tokens={num_tokens}, topk={topk}"
                )

                # Create random topk_ids with shape [num_tokens, topk]
                topk_ids = torch.randint(
                    0, num_experts, (num_tokens, topk), dtype=torch.int32, device="cuda"
                )

                max_num_tokens_padded = topk_ids.numel() + num_experts * (
                    block_size - 1
                )
                sorted_ids = torch.empty(
                    (max_num_tokens_padded,), dtype=torch.int32, device=topk_ids.device
                )
                sorted_ids.fill_(topk_ids.numel())
                max_num_m_blocks = max_num_tokens_padded // block_size
                expert_ids = torch.empty(
                    (max_num_m_blocks,), dtype=torch.int32, device=topk_ids.device
                )
                num_tokens_post_pad = torch.empty(
                    (1), dtype=torch.int32, device=topk_ids.device
                )

                token_cnts_buffer = torch.empty(
                    (num_experts + 1) * num_experts,
                    dtype=torch.int32,
                    device=topk_ids.device,
                )
                cumsum_buffer = torch.empty(
                    num_experts + 1, dtype=torch.int32, device=topk_ids.device
                )

                try:
                    moe_align_block_size(
                        topk_ids,
                        num_experts,
                        block_size,
                        sorted_ids,
                        expert_ids,
                        num_tokens_post_pad,
                        token_cnts_buffer,
                        cumsum_buffer,
                    )
                except Exception as e:
                    print(
                        f"Error occurred with block_size={block_size}, num_tokens={num_tokens}, topk={topk}"
                    )
                    print(f"Error message: {str(e)}")
                    raise e


if __name__ == "__main__":
    test_moe_align_block_size()