test_bench_latency.py 697 Bytes
Newer Older
1
2
3
4
5
import unittest

from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_MOE_MODEL_NAME_FOR_TEST,
6
7
    is_in_ci,
    run_bench_latency,
8
9
10
11
12
)


class TestBenchLatency(unittest.TestCase):
    def test_default(self):
13
        output_throughput = run_bench_latency(DEFAULT_MODEL_NAME_FOR_TEST, [])
14

15
16
        if is_in_ci():
            assert output_throughput > 130, f"{output_throughput=}"
17
18

    def test_moe_default(self):
19
20
        output_throughput = run_bench_latency(
            DEFAULT_MOE_MODEL_NAME_FOR_TEST, ["--tp", "2"]
21
22
        )

23
24
        if is_in_ci():
            assert output_throughput > 125, f"{output_throughput=}"
25
26
27
28


if __name__ == "__main__":
    unittest.main()