test_bench_latency.py 2.12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import subprocess
import unittest

from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
    DEFAULT_MODEL_NAME_FOR_TEST,
    DEFAULT_MOE_MODEL_NAME_FOR_TEST,
)


class TestBenchLatency(unittest.TestCase):
    def test_default(self):
        command = [
            "python3",
            "-m",
            "sglang.bench_latency",
            "--model-path",
            DEFAULT_MODEL_NAME_FOR_TEST,
            "--batch-size",
            "1",
            "--input",
            "128",
            "--output",
            "8",
        ]
        process = subprocess.Popen(
            command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )

        try:
            stdout, stderr = process.communicate()
            output = stdout.decode()
            error = stderr.decode()
            print(f"Output: {output}")
            print(f"Error: {error}")

            lastline = output.split("\n")[-3]
            value = float(lastline.split(" ")[-2])

            if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
                assert value > 130
        finally:
            kill_child_process(process.pid)

    def test_moe_default(self):
        command = [
            "python3",
            "-m",
            "sglang.bench_latency",
            "--model",
            DEFAULT_MOE_MODEL_NAME_FOR_TEST,
            "--batch-size",
            "1",
            "--input",
            "128",
            "--output",
            "8",
            "--tp",
            "2",
        ]
        process = subprocess.Popen(
            command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )

        try:
            stdout, stderr = process.communicate()
            output = stdout.decode()
            error = stderr.decode()
            print(f"Output: {output}")
            print(f"Error: {error}")

            lastline = output.split("\n")[-3]
            value = float(lastline.split(" ")[-2])

            if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
                assert value > 125
        finally:
            kill_child_process(process.pid)


if __name__ == "__main__":
    unittest.main()