Improve profiler and integrate profiler in bench_one_batch_server (#6787)

2d72fc47 · Lianmin Zheng · GitHub · b520d028 · 2d72fc47 · 2d72fc47
Unverified Commit 2d72fc47 authored May 31, 2025 by Lianmin Zheng Committed by GitHub May 31, 2025
5 changed files
--- a/test/srt/test_radix_attention.py
+++ b/test/srt/test_radix_attention.py
@@ -9,6 +9,7 @@ from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
    CustomTestCase,
+    is_in_ci,
    kill_process_tree,
    popen_launch_server,
 )
@@ -88,6 +89,7 @@ class TestRadixCacheFCFS(CustomTestCase):
        run_test(self.base_url, nodes)
+@unittest.skipIf(is_in_ci(), "To reduce the CI execution time.")
 class TestRadixCacheLPM(TestRadixCacheFCFS):
    @classmethod
    def setUpClass(cls):

--- a/test/srt/test_torch_compile.py
+++ b/test/srt/test_torch_compile.py
@@ -11,6 +11,7 @@ from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
    CustomTestCase,
+    is_in_amd_ci,
    popen_launch_server,
 )
@@ -68,7 +69,11 @@ class TestTorchCompile(CustomTestCase):
        print(f"{res=}")
        throughput = max_tokens / (tok - tic)
        print(f"Throughput: {throughput} tokens/s")
-        self.assertGreaterEqual(throughput, 152)
+        if is_in_amd_ci():
+            self.assertGreaterEqual(throughput, 145)
+        else:
+            self.assertGreaterEqual(throughput, 152)
 if __name__ == "__main__":

--- a/test/srt/test_vision_openai_server_a.py
+++ b/test/srt/test_vision_openai_server_a.py
@@ -4,6 +4,8 @@ python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_
 python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
 """
+import unittest
 from test_vision_openai_server_common import *
 from sglang.srt.utils import kill_process_tree

--- a/test/srt/test_vision_openai_server_b.py
+++ b/test/srt/test_vision_openai_server_b.py
+import unittest
 from test_vision_openai_server_common import *
-from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
-    CustomTestCase,
    popen_launch_server,
 )
@@ -75,28 +75,6 @@ class TestDeepseekVL2Server(TestOpenAIVisionServer):
        pass
-class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
-    @classmethod
-    def setUpClass(cls):
-        cls.model = "deepseek-ai/deepseek-vl2-tiny"
-        cls.base_url = DEFAULT_URL_FOR_TEST
-        cls.api_key = "sk-123456"
-        cls.process = popen_launch_server(
-            cls.model,
-            cls.base_url,
-            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-            other_args=[
-                "--trust-remote-code",
-                "--context-length",
-                "4096",
-            ],
-        )
-        cls.base_url += "/v1"
-    def test_video_chat_completion(self):
-        pass
 class TestJanusProServer(TestOpenAIVisionServer):
    @classmethod
    def setUpClass(cls):

--- a/test/srt/test_vision_openai_server_common.py
+++ b/test/srt/test_vision_openai_server_common.py
@@ -2,7 +2,6 @@ import base64
 import io
 import json
 import os
-import unittest
 from concurrent.futures import ThreadPoolExecutor
 import numpy as np