Unverified Commit 2d72fc47 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Improve profiler and integrate profiler in bench_one_batch_server (#6787)

parent b520d028
......@@ -9,6 +9,7 @@ from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci,
kill_process_tree,
popen_launch_server,
)
......@@ -88,6 +89,7 @@ class TestRadixCacheFCFS(CustomTestCase):
run_test(self.base_url, nodes)
@unittest.skipIf(is_in_ci(), "To reduce the CI execution time.")
class TestRadixCacheLPM(TestRadixCacheFCFS):
@classmethod
def setUpClass(cls):
......
......@@ -11,6 +11,7 @@ from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_amd_ci,
popen_launch_server,
)
......@@ -68,7 +69,11 @@ class TestTorchCompile(CustomTestCase):
print(f"{res=}")
throughput = max_tokens / (tok - tic)
print(f"Throughput: {throughput} tokens/s")
self.assertGreaterEqual(throughput, 152)
if is_in_amd_ci():
self.assertGreaterEqual(throughput, 145)
else:
self.assertGreaterEqual(throughput, 152)
if __name__ == "__main__":
......
......@@ -4,6 +4,8 @@ python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
"""
import unittest
from test_vision_openai_server_common import *
from sglang.srt.utils import kill_process_tree
......
import unittest
from test_vision_openai_server_common import *
from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
......@@ -75,28 +75,6 @@ class TestDeepseekVL2Server(TestOpenAIVisionServer):
pass
class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "deepseek-ai/deepseek-vl2-tiny"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--context-length",
"4096",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestJanusProServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
......
......@@ -2,7 +2,6 @@ import base64
import io
import json
import os
import unittest
from concurrent.futures import ThreadPoolExecutor
import numpy as np
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment