test_serving_latency.py 1.07 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
import subprocess
import unittest

from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST


class TestServingLatency(unittest.TestCase):
    def test_default(self):
        command = [
            "python3",
            "-m",
            "sglang.bench_latency",
            "--model",
            DEFAULT_MODEL_NAME_FOR_TEST,
            "--batch-size",
            "1",
            "--input",
            "128",
            "--output",
            "8",
        ]
        process = subprocess.Popen(
            command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )
        stdout, stderr = process.communicate()
        output = stdout.decode()
        error = stderr.decode()
        print(f"Output: {output}")
        print(f"Error: {error}")

        lastline = output.split("\n")[-3]
        value = float(lastline.split(" ")[-2])

        if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
            assert value > 130

        kill_child_process(process.pid)


if __name__ == "__main__":
    unittest.main()