test_bench_latency.py 671 Bytes
Newer Older
1
2
3
4
5
import unittest

from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_MOE_MODEL_NAME_FOR_TEST,
6
7
    is_in_ci,
    run_bench_latency,
8
9
10
11
12
)


class TestBenchLatency(unittest.TestCase):
    def test_default(self):
13
        output_throughput = run_bench_latency(DEFAULT_MODEL_NAME_FOR_TEST, [])
14

15
        if is_in_ci():
16
            self.assertGreater(output_throughput, 135)
17
18

    def test_moe_default(self):
19
20
        output_throughput = run_bench_latency(
            DEFAULT_MOE_MODEL_NAME_FOR_TEST, ["--tp", "2"]
21
22
        )

23
        if is_in_ci():
24
            self.assertGreater(output_throughput, 125)
25
26
27
28


if __name__ == "__main__":
    unittest.main()