Unverified Commit 158e8f1e authored by Mingyi's avatar Mingyi Committed by GitHub
Browse files

improve the threshold and ports in tests (#1215)

parent d3efcb39
...@@ -23,18 +23,14 @@ from sglang.utils import get_exception_traceback ...@@ -23,18 +23,14 @@ from sglang.utils import get_exception_traceback
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct" DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1" DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 600
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157 DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157" DEFAULT_URL_FOR_TEST = "http://127.0.0.1:6157"
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
DEFAULT_URL_FOR_E2E_TEST = "http://127.0.0.1:9157"
else: else:
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:1157" DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 1157
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:1257" DEFAULT_URL_FOR_TEST = "http://127.0.0.1:2157"
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:1357"
DEFAULT_URL_FOR_E2E_TEST = "http://127.0.0.1:1457"
def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None): def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
......
...@@ -7,7 +7,8 @@ import requests ...@@ -7,7 +7,8 @@ import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -17,11 +18,11 @@ class TestBatchPenalizerE2E(unittest.TestCase): ...@@ -17,11 +18,11 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=( other_args=(
"--random-seed", "--random-seed",
"0", "0",
......
...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process ...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -20,11 +21,11 @@ class TestChunkedPrefill(unittest.TestCase): ...@@ -20,11 +21,11 @@ class TestChunkedPrefill(unittest.TestCase):
other_args += ["--enable-mixed-chunk"] other_args += ["--enable-mixed-chunk"]
model = DEFAULT_MODEL_NAME_FOR_TEST model = DEFAULT_MODEL_NAME_FOR_TEST
base_url = DEFAULT_URL_FOR_UNIT_TEST base_url = DEFAULT_URL_FOR_TEST
process = popen_launch_server( process = popen_launch_server(
model, model,
base_url, base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=other_args, other_args=other_args,
) )
......
...@@ -4,17 +4,24 @@ import openai ...@@ -4,17 +4,24 @@ import openai
from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)
class TestOpenAIServer(unittest.TestCase): class TestOpenAIServer(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "intfloat/e5-mistral-7b-instruct" cls.model = "intfloat/e5-mistral-7b-instruct"
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456" cls.api_key = "sk-123456"
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, api_key=cls.api_key cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
) )
cls.base_url += "/v1" cls.base_url += "/v1"
cls.tokenizer = get_tokenizer(cls.model) cls.tokenizer = get_tokenizer(cls.model)
......
...@@ -5,8 +5,8 @@ from sglang.srt.utils import kill_child_process ...@@ -5,8 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_ACCURACY_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -15,11 +15,11 @@ class TestEvalAccuracyLarge(unittest.TestCase): ...@@ -15,11 +15,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--log-level-http", "warning"], other_args=["--log-level-http", "warning"],
) )
......
...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process ...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_ACCURACY_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase): ...@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--log-level-http", "warning", "--chunked-prefill-size", "256"], other_args=["--log-level-http", "warning", "--chunked-prefill-size", "256"],
) )
......
...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process ...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_ACCURACY_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase): ...@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[ other_args=[
"--log-level-http", "--log-level-http",
"warning", "warning",
......
...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process ...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -14,8 +15,10 @@ class TestEvalAccuracyMini(unittest.TestCase): ...@@ -14,8 +15,10 @@ class TestEvalAccuracyMini(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300) cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
......
...@@ -10,7 +10,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer ...@@ -10,7 +10,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -19,12 +20,12 @@ class TestOpenAIServer(unittest.TestCase): ...@@ -19,12 +20,12 @@ class TestOpenAIServer(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456" cls.api_key = "sk-123456"
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key, api_key=cls.api_key,
other_args=("--max-total-token", "1024"), other_args=("--max-total-token", "1024"),
env={"SGLANG_CLIP_MAX_NEW_TOKENS": "256", **os.environ}, env={"SGLANG_CLIP_MAX_NEW_TOKENS": "256", **os.environ},
......
...@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs ...@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MOE_MODEL_NAME_FOR_TEST, DEFAULT_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_MOE_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -25,9 +26,12 @@ class TestServingThroughput(unittest.TestCase): ...@@ -25,9 +26,12 @@ class TestServingThroughput(unittest.TestCase):
other_args.append("--enable-p2p-check") other_args.append("--enable-p2p-check")
model = DEFAULT_MOE_MODEL_NAME_FOR_TEST model = DEFAULT_MOE_MODEL_NAME_FOR_TEST
base_url = DEFAULT_URL_FOR_MOE_TEST base_url = DEFAULT_URL_FOR_TEST
process = popen_launch_server( process = popen_launch_server(
model, base_url, timeout=300, other_args=other_args model,
base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=other_args,
) )
# Run benchmark # Run benchmark
...@@ -72,8 +76,8 @@ class TestServingThroughput(unittest.TestCase): ...@@ -72,8 +76,8 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance # A100 (PCIE): 950, H100 (SMX): 1800
assert res["output_throughput"] > 910 assert res["output_throughput"] > 1750
def test_default_without_radix_cache(self): def test_default_without_radix_cache(self):
res = self.run_test( res = self.run_test(
...@@ -83,19 +87,8 @@ class TestServingThroughput(unittest.TestCase): ...@@ -83,19 +87,8 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance # A100 (PCIE): 950, H100 (SMX): 1900
assert res["output_throughput"] > 910 assert res["output_throughput"] > 1850
def test_default_without_chunked_prefill(self):
res = self.run_test(
disable_radix_cache=ServerArgs.disable_radix_cache,
disable_flashinfer=ServerArgs.disable_flashinfer,
chunked_prefill_size=-1,
)
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance
print(res["output_throughput"])
def test_all_cases(self): def test_all_cases(self):
for disable_radix_cache in [False, True]: for disable_radix_cache in [False, True]:
......
...@@ -8,7 +8,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer ...@@ -8,7 +8,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -17,10 +18,13 @@ class TestOpenAIServer(unittest.TestCase): ...@@ -17,10 +18,13 @@ class TestOpenAIServer(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456" cls.api_key = "sk-123456"
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, api_key=cls.api_key cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
) )
cls.base_url += "/v1" cls.base_url += "/v1"
cls.tokenizer = get_tokenizer(DEFAULT_MODEL_NAME_FOR_TEST) cls.tokenizer = get_tokenizer(DEFAULT_MODEL_NAME_FOR_TEST)
......
...@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs ...@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_E2E_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -23,9 +24,12 @@ class TestServingThroughput(unittest.TestCase): ...@@ -23,9 +24,12 @@ class TestServingThroughput(unittest.TestCase):
other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)]) other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)])
model = DEFAULT_MODEL_NAME_FOR_TEST model = DEFAULT_MODEL_NAME_FOR_TEST
base_url = DEFAULT_URL_FOR_E2E_TEST base_url = DEFAULT_URL_FOR_TEST
process = popen_launch_server( process = popen_launch_server(
model, base_url, timeout=300, other_args=other_args model,
base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=other_args,
) )
# Run benchmark # Run benchmark
...@@ -70,8 +74,8 @@ class TestServingThroughput(unittest.TestCase): ...@@ -70,8 +74,8 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance # A100 (PCIE): 1450, H100 (SMX): 2550
assert res["output_throughput"] > 1400 assert res["output_throughput"] > 2500
def test_default_without_radix_cache(self): def test_default_without_radix_cache(self):
res = self.run_test( res = self.run_test(
...@@ -81,8 +85,8 @@ class TestServingThroughput(unittest.TestCase): ...@@ -81,8 +85,8 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance # A100 (PCIE): 1500, H100 (SMX): 2850
assert res["output_throughput"] > 1450 assert res["output_throughput"] > 2800
def test_default_without_chunked_prefill(self): def test_default_without_chunked_prefill(self):
res = self.run_test( res = self.run_test(
...@@ -92,8 +96,8 @@ class TestServingThroughput(unittest.TestCase): ...@@ -92,8 +96,8 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
# A100 (PCIE) performance # A100 (PCIE): 1450, H100 (SMX): 2550
assert res["output_throughput"] > 1400 assert res["output_throughput"] > 2500
def test_all_cases(self): def test_all_cases(self):
for disable_radix_cache in [False, True]: for disable_radix_cache in [False, True]:
......
...@@ -6,7 +6,8 @@ import requests ...@@ -6,7 +6,8 @@ import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -15,9 +16,12 @@ class TestSkipTokenizerInit(unittest.TestCase): ...@@ -15,9 +16,12 @@ class TestSkipTokenizerInit(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, other_args=["--skip-tokenizer-init"] cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--skip-tokenizer-init"],
) )
@classmethod @classmethod
......
...@@ -6,7 +6,8 @@ import requests ...@@ -6,7 +6,8 @@ import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -15,8 +16,10 @@ class TestSRTEndpoint(unittest.TestCase): ...@@ -15,8 +16,10 @@ class TestSRTEndpoint(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300) cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
......
...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process ...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -14,9 +15,12 @@ class TestTorchCompile(unittest.TestCase): ...@@ -14,9 +15,12 @@ class TestTorchCompile(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, other_args=["--enable-torch-compile"] cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--enable-torch-compile"],
) )
@classmethod @classmethod
......
...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process ...@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from sglang.test.run_eval import run_eval from sglang.test.run_eval import run_eval
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -14,9 +15,12 @@ class TestTritonAttnBackend(unittest.TestCase): ...@@ -14,9 +15,12 @@ class TestTritonAttnBackend(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=300, other_args=["--disable-flashinfer"] cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--disable-flashinfer"],
) )
@classmethod @classmethod
......
...@@ -6,7 +6,8 @@ import requests ...@@ -6,7 +6,8 @@ import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server, popen_launch_server,
) )
...@@ -15,8 +16,10 @@ class TestReplaceWeights(unittest.TestCase): ...@@ -15,8 +16,10 @@ class TestReplaceWeights(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300) cls.process = popen_launch_server(
cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
......
...@@ -11,19 +11,23 @@ from decord import VideoReader, cpu ...@@ -11,19 +11,23 @@ from decord import VideoReader, cpu
from PIL import Image from PIL import Image
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)
class TestOpenAIVisionServer(unittest.TestCase): class TestOpenAIVisionServer(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = "lmms-lab/llava-onevision-qwen2-0.5b-ov" cls.model = "lmms-lab/llava-onevision-qwen2-0.5b-ov"
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456" cls.api_key = "sk-123456"
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
timeout=300, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key, api_key=cls.api_key,
other_args=[ other_args=[
"--chat-template", "--chat-template",
...@@ -67,7 +71,7 @@ class TestOpenAIVisionServer(unittest.TestCase): ...@@ -67,7 +71,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.choices[0].message.role == "assistant" assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content text = response.choices[0].message.content
assert isinstance(text, str) assert isinstance(text, str)
assert "logo" in text, text assert "man" in text or "cab" in text, text
assert response.id assert response.id
assert response.created assert response.created
assert response.usage.prompt_tokens > 0 assert response.usage.prompt_tokens > 0
...@@ -86,18 +90,19 @@ class TestOpenAIVisionServer(unittest.TestCase): ...@@ -86,18 +90,19 @@ class TestOpenAIVisionServer(unittest.TestCase):
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png"
}, },
}, },
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png" "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png"
}, },
}, },
{ {
"type": "text", "type": "text",
"text": "I have shown you two images. Please describe the two images to me.", "text": "I have two very different images. They are not related at all. "
"Please describe the first image in one sentence, and then describe the second image in another sentence.",
}, },
], ],
}, },
...@@ -108,8 +113,9 @@ class TestOpenAIVisionServer(unittest.TestCase): ...@@ -108,8 +113,9 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.choices[0].message.role == "assistant" assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content text = response.choices[0].message.content
assert isinstance(text, str) assert isinstance(text, str)
print(text)
assert "man" in text or "cab" in text, text assert "man" in text or "cab" in text, text
assert "logo" in text, text # assert "logo" in text, text
assert response.id assert response.id
assert response.created assert response.created
assert response.usage.prompt_tokens > 0 assert response.usage.prompt_tokens > 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment