test_compress_utils.py 1.32 KB
Newer Older
root's avatar
init  
root committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import torch
import tilelang
import tilelang.testing

from tilelang.utils.sparse import compress_sm90, randn_semi_sparse


def _test_compress_sm90(M, K, block_k, dtype):
    A = randn_semi_sparse(M, K, dtype=dtype, device='cuda')
    A_sparse, E = compress_sm90(A, block_k, False)


@tilelang.testing.requires_cuda
@tilelang.testing.requires_cuda_compute_version(9, 0)
def test_compress_sm90():
    _test_compress_sm90(1024, 1024, 128, torch.float16)
    _test_compress_sm90(1024, 1024, 64, torch.float16)
    _test_compress_sm90(1024, 1024, 32, torch.float16)

    _test_compress_sm90(1024, 1024, 128, torch.bfloat16)
    _test_compress_sm90(1024, 1024, 64, torch.bfloat16)
    _test_compress_sm90(1024, 1024, 32, torch.bfloat16)

    _test_compress_sm90(1024, 1024, 64, torch.float32)
    _test_compress_sm90(1024, 1024, 32, torch.float32)
    _test_compress_sm90(1024, 1024, 16, torch.float32)

    _test_compress_sm90(1024, 1024, 256, torch.float8_e4m3fn)
    _test_compress_sm90(1024, 1024, 128, torch.float8_e4m3fn)
    _test_compress_sm90(1024, 1024, 64, torch.float8_e4m3fn)

    _test_compress_sm90(1024, 1024, 256, torch.float8_e5m2)
    _test_compress_sm90(1024, 1024, 128, torch.float8_e5m2)
    _test_compress_sm90(1024, 1024, 64, torch.float8_e5m2)


if __name__ == "__main__":
    test_compress_sm90()
    print("All tests passed.")