test_overlap_schedule.py 1000 Bytes
Newer Older
Lianmin Zheng's avatar
Lianmin Zheng committed
1
2
"""
Usage:
3
4
python3 -m unittest test_overlap_schedule.TestOverlapSchedule.test_radix_attention_chunked_prefill
python3 test_overlap_schedule.py
Lianmin Zheng's avatar
Lianmin Zheng committed
5
6
7
8
"""

import unittest

9
from sglang.test.test_utils import run_mmlu_test
Lianmin Zheng's avatar
Lianmin Zheng committed
10
11
12
13


class TestOverlapSchedule(unittest.TestCase):
    def test_no_radix_attention_chunked_prefill(self):
14
15
16
        run_mmlu_test(
            disable_radix_cache=True, chunked_prefill_size=32, enable_overlap=True
        )
Lianmin Zheng's avatar
Lianmin Zheng committed
17
18

    def test_no_radix_attention_no_chunked_prefill(self):
19
20
21
        run_mmlu_test(
            disable_radix_cache=True, chunked_prefill_size=-1, enable_overlap=True
        )
Lianmin Zheng's avatar
Lianmin Zheng committed
22
23

    def test_radix_attention_chunked_prefill(self):
24
25
26
        run_mmlu_test(
            disable_radix_cache=False, chunked_prefill_size=32, enable_overlap=True
        )
Lianmin Zheng's avatar
Lianmin Zheng committed
27
28

    def test_radix_attention_no_chunked_prefill(self):
29
30
31
        run_mmlu_test(
            disable_radix_cache=False, chunked_prefill_size=-1, enable_overlap=True
        )
Lianmin Zheng's avatar
Lianmin Zheng committed
32
33
34
35


if __name__ == "__main__":
    unittest.main()