Unverified Commit b7a2df0a authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

refactor(test): reorganize OpenAI test file structure (#7408)

parent 1998ce40
import unittest
import openai
from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
class TestOpenAIEmbedding(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
# Configure embedding-specific args
other_args = ["--is-embedding", "--enable-metrics"]
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=other_args,
)
cls.base_url += "/v1"
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_embedding_single(self):
"""Test single embedding request"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.embeddings.create(model=self.model, input="Hello world")
self.assertEqual(len(response.data), 1)
self.assertTrue(len(response.data[0].embedding) > 0)
def test_embedding_batch(self):
"""Test batch embedding request"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.embeddings.create(
model=self.model, input=["Hello world", "Test text"]
)
self.assertEqual(len(response.data), 2)
self.assertTrue(len(response.data[0].embedding) > 0)
self.assertTrue(len(response.data[1].embedding) > 0)
def test_embedding_single_batch_str(self):
"""Test embedding with a List[str] and length equals to 1"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.embeddings.create(model=self.model, input=["Hello world"])
self.assertEqual(len(response.data), 1)
self.assertTrue(len(response.data[0].embedding) > 0)
def test_embedding_single_int_list(self):
"""Test embedding with a List[int] or List[List[int]]]"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.embeddings.create(
model=self.model,
input=[[15339, 314, 703, 284, 612, 262, 10658, 10188, 286, 2061]],
)
self.assertEqual(len(response.data), 1)
self.assertTrue(len(response.data[0].embedding) > 0)
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.embeddings.create(
model=self.model,
input=[15339, 314, 703, 284, 612, 262, 10658, 10188, 286, 2061],
)
self.assertEqual(len(response.data), 1)
self.assertTrue(len(response.data[0].embedding) > 0)
def test_empty_string_embedding(self):
"""Test embedding an empty string."""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
# Text embedding example with empty string
text = ""
# Expect a BadRequestError for empty input
with self.assertRaises(openai.BadRequestError) as cm:
client.embeddings.create(
model=self.model,
input=text,
)
# check the status code
self.assertEqual(cm.exception.status_code, 400)
if __name__ == "__main__":
unittest.main()
"""
python3 -m unittest test_openai_server.TestOpenAIServer.test_batch
python3 -m unittest test_openai_server.TestOpenAIServer.test_completion
python3 -m unittest test_openai_server.TestOpenAIServer.test_completion_stream
python3 -m unittest test_openai_server.TestOpenAIServer.test_chat_completion
python3 -m unittest test_openai_server.TestOpenAIServer.test_chat_completion_stream
python3 -m unittest openai_server.basic.test_openai_server.TestOpenAIServer.test_completion
python3 -m unittest openai_server.basic.test_openai_server.TestOpenAIServer.test_completion_stream
python3 -m unittest openai_server.basic.test_openai_server.TestOpenAIServer.test_chat_completion
python3 -m unittest openai_server.basic.test_openai_server.TestOpenAIServer.test_chat_completion_stream
"""
import json
import re
import time
import unittest
import numpy as np
......@@ -20,7 +18,6 @@ from sglang.srt.utils import kill_process_tree
from sglang.test.runners import TEST_RERANK_QUERY_DOCS
from sglang.test.test_utils import (
DEFAULT_SMALL_CROSS_ENCODER_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST,
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
......@@ -508,87 +505,6 @@ class TestOpenAIServerEBNF(CustomTestCase):
)
class TestOpenAIEmbedding(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
# Configure embedding-specific args
other_args = ["--is-embedding", "--enable-metrics"]
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=other_args,
)
cls.base_url += "/v1"
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_embedding_single(self):
"""Test single embedding request"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.embeddings.create(model=self.model, input="Hello world")
self.assertEqual(len(response.data), 1)
self.assertTrue(len(response.data[0].embedding) > 0)
def test_embedding_batch(self):
"""Test batch embedding request"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.embeddings.create(
model=self.model, input=["Hello world", "Test text"]
)
self.assertEqual(len(response.data), 2)
self.assertTrue(len(response.data[0].embedding) > 0)
self.assertTrue(len(response.data[1].embedding) > 0)
def test_embedding_single_batch_str(self):
"""Test embedding with a List[str] and length equals to 1"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.embeddings.create(model=self.model, input=["Hello world"])
self.assertEqual(len(response.data), 1)
self.assertTrue(len(response.data[0].embedding) > 0)
def test_embedding_single_int_list(self):
"""Test embedding with a List[int] or List[List[int]]]"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.embeddings.create(
model=self.model,
input=[[15339, 314, 703, 284, 612, 262, 10658, 10188, 286, 2061]],
)
self.assertEqual(len(response.data), 1)
self.assertTrue(len(response.data[0].embedding) > 0)
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.embeddings.create(
model=self.model,
input=[15339, 314, 703, 284, 612, 262, 10658, 10188, 286, 2061],
)
self.assertEqual(len(response.data), 1)
self.assertTrue(len(response.data[0].embedding) > 0)
def test_empty_string_embedding(self):
"""Test embedding an empty string."""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
# Text embedding example with empty string
text = ""
# Expect a BadRequestError for empty input
with self.assertRaises(openai.BadRequestError) as cm:
client.embeddings.create(
model=self.model,
input=text,
)
# check the status code
self.assertEqual(cm.exception.status_code, 400)
class TestOpenAIV1Rerank(CustomTestCase):
@classmethod
def setUpClass(cls):
......@@ -660,79 +576,6 @@ class TestOpenAIV1Rerank(CustomTestCase):
self.assertTrue(isinstance(response[1]["index"], int))
class TestOpenAIServerIgnoreEOS(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
)
cls.base_url += "/v1"
cls.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_ignore_eos(self):
"""
Test that ignore_eos=True allows generation to continue beyond EOS token
and reach the max_tokens limit.
"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
max_tokens = 200
response_default = client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Count from 1 to 20."},
],
temperature=0,
max_tokens=max_tokens,
extra_body={"ignore_eos": False},
)
response_ignore_eos = client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Count from 1 to 20."},
],
temperature=0,
max_tokens=max_tokens,
extra_body={"ignore_eos": True},
)
default_tokens = len(
self.tokenizer.encode(response_default.choices[0].message.content)
)
ignore_eos_tokens = len(
self.tokenizer.encode(response_ignore_eos.choices[0].message.content)
)
# Check if ignore_eos resulted in more tokens or exactly max_tokens
# The ignore_eos response should either:
# 1. Have more tokens than the default response (if default stopped at EOS before max_tokens)
# 2. Have exactly max_tokens (if it reached the max_tokens limit)
self.assertTrue(
ignore_eos_tokens > default_tokens or ignore_eos_tokens >= max_tokens,
f"ignore_eos did not generate more tokens: {ignore_eos_tokens} vs {default_tokens}",
)
self.assertEqual(
response_ignore_eos.choices[0].finish_reason,
"length",
f"Expected finish_reason='length' for ignore_eos=True, got {response_ignore_eos.choices[0].finish_reason}",
)
class TestOpenAIV1Score(CustomTestCase):
@classmethod
def setUpClass(cls):
......
"""
Unit tests for the OpenAIServingEmbedding class from serving_embedding.py.
These tests ensure that the embedding serving implementation maintains compatibility
with the original adapter.py functionality and follows OpenAI API specifications.
"""
import unittest
......
......@@ -97,7 +97,7 @@ class TestCacheReport(CustomTestCase):
)
first_cached_tokens = int(response.usage.prompt_tokens_details.cached_tokens)
# assert int(response.usage.cached_tokens) == 0
assert first_cached_tokens < self.min_cached
assert first_cached_tokens <= self.min_cached
response = self.run_openai(message)
cached_tokens = int(response.usage.prompt_tokens_details.cached_tokens)
print(f"openai second request cached_tokens: {cached_tokens}")
......
"""
Usage:
python3 -m unittest test_enable_thinking.TestEnableThinking.test_chat_completion_with_reasoning
python3 -m unittest test_enable_thinking.TestEnableThinking.test_chat_completion_without_reasoning
python3 -m unittest test_enable_thinking.TestEnableThinking.test_stream_chat_completion_with_reasoning
python3 -m unittest test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_chat_completion_with_reasoning
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_chat_completion_without_reasoning
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_with_reasoning
python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
"""
import asyncio
......@@ -13,8 +13,10 @@ import sys
import time
import unittest
import openai
import requests
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST,
......
"""
python3 -m unittest test_json_constrained.TestJSONConstrainedOutlinesBackend.test_json_generate
python3 -m unittest test_json_constrained.TestJSONConstrainedXGrammarBackend.test_json_generate
python3 -m unittest test_json_constrained.TestJSONConstrainedLLGuidanceBackend.test_json_generate
python3 -m unittest openai_server.features.test_json_constrained.TestJSONConstrainedOutlinesBackend.test_json_generate
python3 -m unittest openai_server.features.test_json_constrained.TestJSONConstrainedXGrammarBackend.test_json_generate
python3 -m unittest openai_server.features.test_json_constrained.TestJSONConstrainedLLGuidanceBackend.test_json_generate
"""
import json
......
"""
python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_response
python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_with_streaming
python3 -m unittest openai_server.features.test_json_mode.TestJSONModeOutlines.test_json_mode_response
python3 -m unittest openai_server.features.test_json_mode.TestJSONModeOutlines.test_json_mode_with_streaming
python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_response
python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_with_streaming
python3 -m unittest openai_server.features.test_json_mode.TestJSONModeXGrammar.test_json_mode_response
python3 -m unittest openai_server.features.test_json_mode.TestJSONModeXGrammar.test_json_mode_with_streaming
python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_response
python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_with_streaming
python3 -m unittest openai_server.features.test_json_mode.TestJSONModeLLGuidance.test_json_mode_response
python3 -m unittest openai_server.features.test_json_mode.TestJSONModeLLGuidance.test_json_mode_with_streaming
"""
import json
......
import re
import openai
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_process_tree
from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
# -------------------------------------------------------------------------
# EBNF Test Class: TestOpenAIServerEBNF
# Launches the server with xgrammar, has only EBNF tests
# -------------------------------------------------------------------------
class TestOpenAIServerEBNF(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
# passing xgrammar specifically
other_args = ["--grammar-backend", "xgrammar"]
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=other_args,
)
cls.base_url += "/v1"
cls.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_ebnf(self):
"""
Ensure we can pass `ebnf` to the local openai server
and that it enforces the grammar.
"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
ebnf_grammar = r"""
root ::= "Hello" | "Hi" | "Hey"
"""
pattern = re.compile(r"^(Hello|Hi|Hey)[.!?]*\s*$")
response = client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "You are a helpful EBNF test bot."},
{"role": "user", "content": "Say a greeting (Hello, Hi, or Hey)."},
],
temperature=0,
max_tokens=32,
extra_body={"ebnf": ebnf_grammar},
)
text = response.choices[0].message.content.strip()
self.assertTrue(len(text) > 0, "Got empty text from EBNF generation")
self.assertRegex(text, pattern, f"Text '{text}' doesn't match EBNF choices")
def test_ebnf_strict_json(self):
"""
A stricter EBNF that produces exactly {"name":"Alice"} format
with no trailing punctuation or extra fields.
"""
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
ebnf_grammar = r"""
root ::= "{" pair "}"
pair ::= "\"name\"" ":" string
string ::= "\"" [A-Za-z]+ "\""
"""
pattern = re.compile(r'^\{"name":"[A-Za-z]+"\}$')
response = client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "EBNF mini-JSON generator."},
{
"role": "user",
"content": "Generate single key JSON with only letters.",
},
],
temperature=0,
max_tokens=64,
extra_body={"ebnf": ebnf_grammar},
)
text = response.choices[0].message.content.strip()
self.assertTrue(len(text) > 0, "Got empty text from EBNF strict JSON test")
self.assertRegex(
text, pattern, f"Text '{text}' not matching the EBNF strict JSON shape"
)
"""
Usage:
python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_false
python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_true
python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_true_stream_reasoning_false
python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_nonstreaming_separate_reasoning_false
python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_nonstreaming_separate_reasoning_true
python3 -m unittest test_reasoning_content.TestReasoningContentStartup.test_nonstreaming
python3 -m unittest test_reasoning_content.TestReasoningContentStartup.test_streaming
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_false
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_true
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_true_stream_reasoning_false
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentAPI.test_nonstreaming_separate_reasoning_false
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentAPI.test_nonstreaming_separate_reasoning_true
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_nonstreaming
python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
"""
import json
......
......@@ -2,9 +2,12 @@
Test script for tool_choice functionality in SGLang
Tests: required, auto, and specific function choices in both streaming and non-streaming modes
python3 -m unittest test_tool_choice.TestToolChoice
# To run the tests, use the following command:
#
# python3 -m unittest openai_server.function_call.test_tool_choice
"""
import json
import unittest
import openai
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment