test_bench_latency.py 763 Bytes
Newer Older
1
2
3
4
5
6
7
import subprocess
import unittest

from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_MOE_MODEL_NAME_FOR_TEST,
8
9
    is_in_ci,
    run_bench_latency,
10
11
12
13
14
)


class TestBenchLatency(unittest.TestCase):
    def test_default(self):
15
        output_throughput = run_bench_latency(DEFAULT_MODEL_NAME_FOR_TEST, [])
16

17
18
        if is_in_ci():
            assert output_throughput > 130, f"{output_throughput=}"
19
20

    def test_moe_default(self):
21
22
        output_throughput = run_bench_latency(
            DEFAULT_MOE_MODEL_NAME_FOR_TEST, ["--tp", "2"]
23
24
        )

25
26
        if is_in_ci():
            assert output_throughput > 125, f"{output_throughput=}"
27
28
29
30


if __name__ == "__main__":
    unittest.main()