refactor(test): reorganize OpenAI test file structure (#7408)

b7a2df0a · Chang Su · GitHub · 1998ce40 · b7a2df0a · b7a2df0a
Unverified Commit b7a2df0a authored Jun 21, 2025 by Chang Su Committed by GitHub Jun 21, 2025
20 changed files
--- a/test/srt/openai_server/__init__.py
+++ b/test/srt/openai_server/__init__.py
--- a/test/srt/openai_server/basic/__init__.py
+++ b/test/srt/openai_server/basic/__init__.py
--- a/test/srt/openai_server/basic/test_openai_embedding.py
+++ b/test/srt/openai_server/basic/test_openai_embedding.py
+import unittest
+
+import openai
+
+from sglang.srt.utils import kill_process_tree
+from sglang.test.test_utils import (
+    DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST,
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
+    popen_launch_server,
+)
+
+
+class TestOpenAIEmbedding(CustomTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.api_key = "sk-123456"
+
+        # Configure embedding-specific args
+        other_args = ["--is-embedding", "--enable-metrics"]
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            api_key=cls.api_key,
+            other_args=other_args,
+        )
+        cls.base_url += "/v1"
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_embedding_single(self):
+        """Test single embedding request"""
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+        response = client.embeddings.create(model=self.model, input="Hello world")
+        self.assertEqual(len(response.data), 1)
+        self.assertTrue(len(response.data[0].embedding) > 0)
+
+    def test_embedding_batch(self):
+        """Test batch embedding request"""
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+        response = client.embeddings.create(
+            model=self.model, input=["Hello world", "Test text"]
+        )
+        self.assertEqual(len(response.data), 2)
+        self.assertTrue(len(response.data[0].embedding) > 0)
+        self.assertTrue(len(response.data[1].embedding) > 0)
+
+    def test_embedding_single_batch_str(self):
+        """Test embedding with a List[str] and length equals to 1"""
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+        response = client.embeddings.create(model=self.model, input=["Hello world"])
+        self.assertEqual(len(response.data), 1)
+        self.assertTrue(len(response.data[0].embedding) > 0)
+
+    def test_embedding_single_int_list(self):
+        """Test embedding with a List[int] or List[List[int]]]"""
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+        response = client.embeddings.create(
+            model=self.model,
+            input=[[15339, 314, 703, 284, 612, 262, 10658, 10188, 286, 2061]],
+        )
+        self.assertEqual(len(response.data), 1)
+        self.assertTrue(len(response.data[0].embedding) > 0)
+
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+        response = client.embeddings.create(
+            model=self.model,
+            input=[15339, 314, 703, 284, 612, 262, 10658, 10188, 286, 2061],
+        )
+        self.assertEqual(len(response.data), 1)
+        self.assertTrue(len(response.data[0].embedding) > 0)
+
+    def test_empty_string_embedding(self):
+        """Test embedding an empty string."""
+
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+
+        # Text embedding example with empty string
+        text = ""
+        # Expect a BadRequestError for empty input
+        with self.assertRaises(openai.BadRequestError) as cm:
+            client.embeddings.create(
+                model=self.model,
+                input=text,
+            )
+        # check the status code
+        self.assertEqual(cm.exception.status_code, 400)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/srt/test_openai_server.py
+++ b/test/srt/test_openai_server.py
 """
-python3 -m unittest test_openai_server.TestOpenAIServer.test_batch
-python3 -m unittest test_openai_server.TestOpenAIServer.test_completion
-python3 -m unittest test_openai_server.TestOpenAIServer.test_completion_stream
-python3 -m unittest test_openai_server.TestOpenAIServer.test_chat_completion
-python3 -m unittest test_openai_server.TestOpenAIServer.test_chat_completion_stream
+python3 -m unittest openai_server.basic.test_openai_server.TestOpenAIServer.test_completion
+python3 -m unittest openai_server.basic.test_openai_server.TestOpenAIServer.test_completion_stream
+python3 -m unittest openai_server.basic.test_openai_server.TestOpenAIServer.test_chat_completion
+python3 -m unittest openai_server.basic.test_openai_server.TestOpenAIServer.test_chat_completion_stream
 """

 import json
 import re
-import time
 import unittest

 import numpy as np
@@ -20,7 +18,6 @@ from sglang.srt.utils import kill_process_tree
 from sglang.test.runners import TEST_RERANK_QUERY_DOCS
 from sglang.test.test_utils import (
    DEFAULT_SMALL_CROSS_ENCODER_MODEL_NAME_FOR_TEST,
-    DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST,
    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
@@ -508,87 +505,6 @@ class TestOpenAIServerEBNF(CustomTestCase):
        )


-class TestOpenAIEmbedding(CustomTestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.model = DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST
-        cls.base_url = DEFAULT_URL_FOR_TEST
-        cls.api_key = "sk-123456"
-
-        # Configure embedding-specific args
-        other_args = ["--is-embedding", "--enable-metrics"]
-        cls.process = popen_launch_server(
-            cls.model,
-            cls.base_url,
-            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-            api_key=cls.api_key,
-            other_args=other_args,
-        )
-        cls.base_url += "/v1"
-
-    @classmethod
-    def tearDownClass(cls):
-        kill_process_tree(cls.process.pid)
-
-    def test_embedding_single(self):
-        """Test single embedding request"""
-        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
-        response = client.embeddings.create(model=self.model, input="Hello world")
-        self.assertEqual(len(response.data), 1)
-        self.assertTrue(len(response.data[0].embedding) > 0)
-
-    def test_embedding_batch(self):
-        """Test batch embedding request"""
-        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
-        response = client.embeddings.create(
-            model=self.model, input=["Hello world", "Test text"]
-        )
-        self.assertEqual(len(response.data), 2)
-        self.assertTrue(len(response.data[0].embedding) > 0)
-        self.assertTrue(len(response.data[1].embedding) > 0)
-
-    def test_embedding_single_batch_str(self):
-        """Test embedding with a List[str] and length equals to 1"""
-        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
-        response = client.embeddings.create(model=self.model, input=["Hello world"])
-        self.assertEqual(len(response.data), 1)
-        self.assertTrue(len(response.data[0].embedding) > 0)
-
-    def test_embedding_single_int_list(self):
-        """Test embedding with a List[int] or List[List[int]]]"""
-        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
-        response = client.embeddings.create(
-            model=self.model,
-            input=[[15339, 314, 703, 284, 612, 262, 10658, 10188, 286, 2061]],
-        )
-        self.assertEqual(len(response.data), 1)
-        self.assertTrue(len(response.data[0].embedding) > 0)
-
-        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
-        response = client.embeddings.create(
-            model=self.model,
-            input=[15339, 314, 703, 284, 612, 262, 10658, 10188, 286, 2061],
-        )
-        self.assertEqual(len(response.data), 1)
-        self.assertTrue(len(response.data[0].embedding) > 0)
-
-    def test_empty_string_embedding(self):
-        """Test embedding an empty string."""
-
-        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
-
-        # Text embedding example with empty string
-        text = ""
-        # Expect a BadRequestError for empty input
-        with self.assertRaises(openai.BadRequestError) as cm:
-            client.embeddings.create(
-                model=self.model,
-                input=text,
-            )
-        # check the status code
-        self.assertEqual(cm.exception.status_code, 400)
-
-
 class TestOpenAIV1Rerank(CustomTestCase):
    @classmethod
    def setUpClass(cls):
@@ -660,79 +576,6 @@ class TestOpenAIV1Rerank(CustomTestCase):
        self.assertTrue(isinstance(response[1]["index"], int))


-class TestOpenAIServerIgnoreEOS(CustomTestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
-        cls.base_url = DEFAULT_URL_FOR_TEST
-        cls.api_key = "sk-123456"
-        cls.process = popen_launch_server(
-            cls.model,
-            cls.base_url,
-            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-            api_key=cls.api_key,
-        )
-        cls.base_url += "/v1"
-        cls.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
-
-    @classmethod
-    def tearDownClass(cls):
-        kill_process_tree(cls.process.pid)
-
-    def test_ignore_eos(self):
-        """
-        Test that ignore_eos=True allows generation to continue beyond EOS token
-        and reach the max_tokens limit.
-        """
-        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
-
-        max_tokens = 200
-
-        response_default = client.chat.completions.create(
-            model=self.model,
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": "Count from 1 to 20."},
-            ],
-            temperature=0,
-            max_tokens=max_tokens,
-            extra_body={"ignore_eos": False},
-        )
-
-        response_ignore_eos = client.chat.completions.create(
-            model=self.model,
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": "Count from 1 to 20."},
-            ],
-            temperature=0,
-            max_tokens=max_tokens,
-            extra_body={"ignore_eos": True},
-        )
-
-        default_tokens = len(
-            self.tokenizer.encode(response_default.choices[0].message.content)
-        )
-        ignore_eos_tokens = len(
-            self.tokenizer.encode(response_ignore_eos.choices[0].message.content)
-        )
-
-        # Check if ignore_eos resulted in more tokens or exactly max_tokens
-        # The ignore_eos response should either:
-        # 1. Have more tokens than the default response (if default stopped at EOS before max_tokens)
-        # 2. Have exactly max_tokens (if it reached the max_tokens limit)
-        self.assertTrue(
-            ignore_eos_tokens > default_tokens or ignore_eos_tokens >= max_tokens,
-            f"ignore_eos did not generate more tokens: {ignore_eos_tokens} vs {default_tokens}",
-        )
-
-        self.assertEqual(
-            response_ignore_eos.choices[0].finish_reason,
-            "length",
-            f"Expected finish_reason='length' for ignore_eos=True, got {response_ignore_eos.choices[0].finish_reason}",
-        )
-
-
 class TestOpenAIV1Score(CustomTestCase):
    @classmethod
    def setUpClass(cls):

--- a/test/srt/openai/test_protocol.py
+++ b/test/srt/openai/test_protocol.py
--- a/test/srt/openai/test_serving_chat.py
+++ b/test/srt/openai/test_serving_chat.py
--- a/test/srt/openai/test_serving_completions.py
+++ b/test/srt/openai/test_serving_completions.py
--- a/test/srt/openai/test_serving_embedding.py
+++ b/test/srt/openai/test_serving_embedding.py
 """
 Unit tests for the OpenAIServingEmbedding class from serving_embedding.py.
-
-These tests ensure that the embedding serving implementation maintains compatibility
-with the original adapter.py functionality and follows OpenAI API specifications.
 """

 import unittest

--- a/test/srt/openai_server/features/__init__.py
+++ b/test/srt/openai_server/features/__init__.py
--- a/test/srt/test_cache_report.py
+++ b/test/srt/test_cache_report.py
@@ -97,7 +97,7 @@ class TestCacheReport(CustomTestCase):
        )
        first_cached_tokens = int(response.usage.prompt_tokens_details.cached_tokens)
        # assert int(response.usage.cached_tokens) == 0
-        assert first_cached_tokens < self.min_cached
+        assert first_cached_tokens <= self.min_cached
        response = self.run_openai(message)
        cached_tokens = int(response.usage.prompt_tokens_details.cached_tokens)
        print(f"openai second request cached_tokens: {cached_tokens}")

--- a/test/srt/test_enable_thinking.py
+++ b/test/srt/test_enable_thinking.py
 """
 Usage:
-python3 -m unittest test_enable_thinking.TestEnableThinking.test_chat_completion_with_reasoning
-python3 -m unittest test_enable_thinking.TestEnableThinking.test_chat_completion_without_reasoning
-python3 -m unittest test_enable_thinking.TestEnableThinking.test_stream_chat_completion_with_reasoning
-python3 -m unittest test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
+python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_chat_completion_with_reasoning
+python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_chat_completion_without_reasoning
+python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_with_reasoning
+python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
 """

 import asyncio
@@ -13,8 +13,10 @@ import sys
 import time
 import unittest

+import openai
 import requests

+from sglang.srt.hf_transformers_utils import get_tokenizer
 from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
    DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST,

--- a/test/srt/test_json_constrained.py
+++ b/test/srt/test_json_constrained.py
 """
-python3 -m unittest test_json_constrained.TestJSONConstrainedOutlinesBackend.test_json_generate
-python3 -m unittest test_json_constrained.TestJSONConstrainedXGrammarBackend.test_json_generate
-python3 -m unittest test_json_constrained.TestJSONConstrainedLLGuidanceBackend.test_json_generate
+python3 -m unittest openai_server.features.test_json_constrained.TestJSONConstrainedOutlinesBackend.test_json_generate
+python3 -m unittest openai_server.features.test_json_constrained.TestJSONConstrainedXGrammarBackend.test_json_generate
+python3 -m unittest openai_server.features.test_json_constrained.TestJSONConstrainedLLGuidanceBackend.test_json_generate
 """

 import json

--- a/test/srt/test_json_mode.py
+++ b/test/srt/test_json_mode.py
 """
-python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_response
-python3 -m unittest test_json_mode.TestJSONModeOutlines.test_json_mode_with_streaming
+python3 -m unittest openai_server.features.test_json_mode.TestJSONModeOutlines.test_json_mode_response
+python3 -m unittest openai_server.features.test_json_mode.TestJSONModeOutlines.test_json_mode_with_streaming

-python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_response
-python3 -m unittest test_json_mode.TestJSONModeXGrammar.test_json_mode_with_streaming
+python3 -m unittest openai_server.features.test_json_mode.TestJSONModeXGrammar.test_json_mode_response
+python3 -m unittest openai_server.features.test_json_mode.TestJSONModeXGrammar.test_json_mode_with_streaming

-python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_response
-python3 -m unittest test_json_mode.TestJSONModeLLGuidance.test_json_mode_with_streaming
+python3 -m unittest openai_server.features.test_json_mode.TestJSONModeLLGuidance.test_json_mode_response
+python3 -m unittest openai_server.features.test_json_mode.TestJSONModeLLGuidance.test_json_mode_with_streaming
 """

 import json

--- a/test/srt/openai_server/features/test_openai_server_ebnf.py
+++ b/test/srt/openai_server/features/test_openai_server_ebnf.py
+import re
+
+import openai
+
+from sglang.srt.hf_transformers_utils import get_tokenizer
+from sglang.srt.utils import kill_process_tree
+from sglang.test.test_utils import (
+    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    CustomTestCase,
+    popen_launch_server,
+)
+
+
+# -------------------------------------------------------------------------
+#    EBNF Test Class: TestOpenAIServerEBNF
+#    Launches the server with xgrammar, has only EBNF tests
+# -------------------------------------------------------------------------
+class TestOpenAIServerEBNF(CustomTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.api_key = "sk-123456"
+
+        # passing xgrammar specifically
+        other_args = ["--grammar-backend", "xgrammar"]
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            api_key=cls.api_key,
+            other_args=other_args,
+        )
+        cls.base_url += "/v1"
+        cls.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_ebnf(self):
+        """
+        Ensure we can pass `ebnf` to the local openai server
+        and that it enforces the grammar.
+        """
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+        ebnf_grammar = r"""
+        root ::= "Hello" | "Hi" | "Hey"
+        """
+        pattern = re.compile(r"^(Hello|Hi|Hey)[.!?]*\s*$")
+
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": "You are a helpful EBNF test bot."},
+                {"role": "user", "content": "Say a greeting (Hello, Hi, or Hey)."},
+            ],
+            temperature=0,
+            max_tokens=32,
+            extra_body={"ebnf": ebnf_grammar},
+        )
+        text = response.choices[0].message.content.strip()
+        self.assertTrue(len(text) > 0, "Got empty text from EBNF generation")
+        self.assertRegex(text, pattern, f"Text '{text}' doesn't match EBNF choices")
+
+    def test_ebnf_strict_json(self):
+        """
+        A stricter EBNF that produces exactly {"name":"Alice"} format
+        with no trailing punctuation or extra fields.
+        """
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+        ebnf_grammar = r"""
+        root    ::= "{" pair "}"
+        pair    ::= "\"name\"" ":" string
+        string  ::= "\"" [A-Za-z]+ "\""
+        """
+        pattern = re.compile(r'^\{"name":"[A-Za-z]+"\}$')
+
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": "EBNF mini-JSON generator."},
+                {
+                    "role": "user",
+                    "content": "Generate single key JSON with only letters.",
+                },
+            ],
+            temperature=0,
+            max_tokens=64,
+            extra_body={"ebnf": ebnf_grammar},
+        )
+        text = response.choices[0].message.content.strip()
+        self.assertTrue(len(text) > 0, "Got empty text from EBNF strict JSON test")
+        self.assertRegex(
+            text, pattern, f"Text '{text}' not matching the EBNF strict JSON shape"
+        )
--- a/test/srt/test_openai_server_hidden_states.py
+++ b/test/srt/test_openai_server_hidden_states.py
--- a/test/srt/test_reasoning_content.py
+++ b/test/srt/test_reasoning_content.py
 """
 Usage:
-python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_false
-python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_true
-python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_true_stream_reasoning_false
-python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_nonstreaming_separate_reasoning_false
-python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_nonstreaming_separate_reasoning_true
-python3 -m unittest test_reasoning_content.TestReasoningContentStartup.test_nonstreaming
-python3 -m unittest test_reasoning_content.TestReasoningContentStartup.test_streaming
+python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_false
+python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_true
+python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_true_stream_reasoning_false
+python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentAPI.test_nonstreaming_separate_reasoning_false
+python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentAPI.test_nonstreaming_separate_reasoning_true
+python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_nonstreaming
+python3 -m unittest openai_server.features.test_reasoning_content.TestReasoningContentStartup.test_streaming
 """

 import json

--- a/test/srt/openai_server/function_call/__init__.py
+++ b/test/srt/openai_server/function_call/__init__.py
--- a/test/srt/test_openai_function_calling.py
+++ b/test/srt/test_openai_function_calling.py
--- a/test/srt/test_tool_choice.py
+++ b/test/srt/test_tool_choice.py
@@ -2,9 +2,12 @@
 Test script for tool_choice functionality in SGLang
 Tests: required, auto, and specific function choices in both streaming and non-streaming modes

-python3 -m unittest test_tool_choice.TestToolChoice
+# To run the tests, use the following command:
+#
+# python3 -m unittest openai_server.function_call.test_tool_choice
 """

+import json
 import unittest

 import openai

--- a/test/srt/openai_server/validation/__init__.py
+++ b/test/srt/openai_server/validation/__init__.py