test_example_blocksparse_attention.py 1.39 KB
Newer Older
root's avatar
init  
root committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import tilelang.testing
import block_sparse_attn_triton
import example_tilelang_block_sparse_attn
import example_tilelang_sparse_gqa_decode_varlen_indice
import example_tilelang_sparse_gqa_decode_varlen_mask
import example_triton_sparse_gqa_decode_varlen_indice
import example_triton_sparse_gqa_decode_varlen_mask


def test_block_sparse_attn_triton():
    block_sparse_attn_triton.main()


def test_example_tilelang_block_sparse_attn():
    example_tilelang_block_sparse_attn.main()


def test_example_tilelang_sparse_gqa_decode_varlen_indice():
    example_tilelang_sparse_gqa_decode_varlen_indice.main(batch=1, max_cache_seqlen=2048)


def test_example_tilelang_sparse_gqa_decode_varlen_mask():
    example_tilelang_sparse_gqa_decode_varlen_mask.main(batch=1, max_cache_seqlen=2048)


def test_example_triton_sparse_gqa_decode_varlen_indice():
    example_triton_sparse_gqa_decode_varlen_indice.main(
        batch=16,
        heads=16,
        heads_kv=8,
        max_cache_seqlen=4096,
        dim=128,
        dim_v=128,
        sparse_ratio=0.8,
        block_size=32)


def test_example_triton_sparse_gqa_decode_varlen_mask():
    example_triton_sparse_gqa_decode_varlen_mask.main(
        batch=16,
        heads=16,
        heads_kv=8,
        max_cache_seqlen=4096,
        dim=128,
        dim_v=128,
        sparse_ratio=0.8,
        block_size=32)


if __name__ == "__main__":
    tilelang.testing.main()