Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
b7a2df0a
Unverified
Commit
b7a2df0a
authored
Jun 21, 2025
by
Chang Su
Committed by
GitHub
Jun 21, 2025
Browse files
refactor(test): reorganize OpenAI test file structure (#7408)
parent
1998ce40
Changes
27
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
124 additions
and
108 deletions
+124
-108
test/srt/openai_server/validation/test_large_max_new_tokens.py
...srt/openai_server/validation/test_large_max_new_tokens.py
+1
-1
test/srt/openai_server/validation/test_matched_stop.py
test/srt/openai_server/validation/test_matched_stop.py
+0
-0
test/srt/openai_server/validation/test_openai_server_ignore_eos.py
...openai_server/validation/test_openai_server_ignore_eos.py
+84
-0
test/srt/openai_server/validation/test_request_length_validation.py
...penai_server/validation/test_request_length_validation.py
+0
-0
test/srt/run_suite.py
test/srt/run_suite.py
+38
-19
test/srt/test_embedding_openai_server.py
test/srt/test_embedding_openai_server.py
+0
-87
test/srt/test_jinja_template_utils.py
test/srt/test_jinja_template_utils.py
+1
-1
No files found.
test/srt/test_large_max_new_tokens.py
→
test/srt/
openai_server/validation/
test_large_max_new_tokens.py
View file @
b7a2df0a
"""
"""
python3 -m unittest test_large_max_new_tokens.TestLargeMaxNewTokens.test_chat_completion
python3 -m unittest
openai_server.validation.
test_large_max_new_tokens.TestLargeMaxNewTokens.test_chat_completion
"""
"""
import
os
import
os
...
...
test/srt/test_matched_stop.py
→
test/srt/
openai_server/validation/
test_matched_stop.py
View file @
b7a2df0a
File moved
test/srt/openai_server/validation/test_openai_server_ignore_eos.py
0 → 100644
View file @
b7a2df0a
import
openai
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
)
class
TestOpenAIServerIgnoreEOS
(
CustomTestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
)
cls
.
base_url
+=
"/v1"
cls
.
tokenizer
=
get_tokenizer
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
)
@
classmethod
def
tearDownClass
(
cls
):
kill_process_tree
(
cls
.
process
.
pid
)
def
test_ignore_eos
(
self
):
"""
Test that ignore_eos=True allows generation to continue beyond EOS token
and reach the max_tokens limit.
"""
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
max_tokens
=
200
response_default
=
client
.
chat
.
completions
.
create
(
model
=
self
.
model
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Count from 1 to 20."
},
],
temperature
=
0
,
max_tokens
=
max_tokens
,
extra_body
=
{
"ignore_eos"
:
False
},
)
response_ignore_eos
=
client
.
chat
.
completions
.
create
(
model
=
self
.
model
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Count from 1 to 20."
},
],
temperature
=
0
,
max_tokens
=
max_tokens
,
extra_body
=
{
"ignore_eos"
:
True
},
)
default_tokens
=
len
(
self
.
tokenizer
.
encode
(
response_default
.
choices
[
0
].
message
.
content
)
)
ignore_eos_tokens
=
len
(
self
.
tokenizer
.
encode
(
response_ignore_eos
.
choices
[
0
].
message
.
content
)
)
# Check if ignore_eos resulted in more tokens or exactly max_tokens
# The ignore_eos response should either:
# 1. Have more tokens than the default response (if default stopped at EOS before max_tokens)
# 2. Have exactly max_tokens (if it reached the max_tokens limit)
self
.
assertTrue
(
ignore_eos_tokens
>
default_tokens
or
ignore_eos_tokens
>=
max_tokens
,
f
"ignore_eos did not generate more tokens:
{
ignore_eos_tokens
}
vs
{
default_tokens
}
"
,
)
self
.
assertEqual
(
response_ignore_eos
.
choices
[
0
].
finish_reason
,
"length"
,
f
"Expected finish_reason='length' for ignore_eos=True, got
{
response_ignore_eos
.
choices
[
0
].
finish_reason
}
"
,
)
test/srt/test_request_length_validation.py
→
test/srt/
openai_server/validation/
test_request_length_validation.py
View file @
b7a2df0a
File moved
test/srt/run_suite.py
View file @
b7a2df0a
...
@@ -29,10 +29,25 @@ suites = {
...
@@ -29,10 +29,25 @@ suites = {
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_vlm_models.py"
,
437
),
TestFile
(
"models/test_vlm_models.py"
,
437
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"models/test_transformers_models.py"
,
320
),
TestFile
(
"openai/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/features/test_cache_report.py"
,
100
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
TestFile
(
"openai_server/features/test_openai_server_hidden_states.py"
,
240
),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_create_kvindices.py"
,
2
),
...
@@ -40,8 +55,6 @@ suites = {
...
@@ -40,8 +55,6 @@ suites = {
TestFile
(
"test_eagle_infer_a.py"
,
370
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
TestFile
(
"test_eagle_infer_b.py"
,
270
),
TestFile
(
"test_eagle_infer_b.py"
,
270
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_enable_thinking.py"
,
70
),
TestFile
(
"test_embedding_openai_server.py"
,
141
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_fa3.py"
,
376
),
TestFile
(
"test_fa3.py"
,
376
),
# TestFile("test_flashmla.py", 352),
# TestFile("test_flashmla.py", 352),
...
@@ -54,8 +67,6 @@ suites = {
...
@@ -54,8 +67,6 @@ suites = {
TestFile
(
"test_int8_kernel.py"
,
8
),
TestFile
(
"test_int8_kernel.py"
,
8
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
TestFile
(
"test_jinja_template_utils.py"
,
1
),
TestFile
(
"test_json_constrained.py"
,
98
),
TestFile
(
"test_large_max_new_tokens.py"
,
41
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_mla.py"
,
167
),
TestFile
(
"test_mla.py"
,
167
),
TestFile
(
"test_mla_deepseek_v3.py"
,
342
),
TestFile
(
"test_mla_deepseek_v3.py"
,
342
),
...
@@ -64,22 +75,16 @@ suites = {
...
@@ -64,22 +75,16 @@ suites = {
TestFile
(
"test_mla_fp8.py"
,
93
),
TestFile
(
"test_mla_fp8.py"
,
93
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
TestFile
(
"test_openai_function_calling.py"
,
60
),
TestFile
(
"test_openai_server.py"
,
149
),
TestFile
(
"test_openai_server_hidden_states.py"
,
240
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_reasoning_content.py"
,
89
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_regex_constrained.py"
,
64
),
TestFile
(
"test_request_length_validation.py"
,
31
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_tool_choice.py"
,
226
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
...
@@ -107,15 +112,32 @@ suites = {
...
@@ -107,15 +112,32 @@ suites = {
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_qwen_models.py"
,
82
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"models/test_reward_models.py"
,
132
),
TestFile
(
"openai_server/basic/test_openai_embedding.py"
,
141
),
TestFile
(
"openai_server/basic/test_openai_server.py"
,
149
),
TestFile
(
"openai_server/basic/test_protocol.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_chat.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_completions.py"
,
10
),
TestFile
(
"openai_server/basic/test_serving_embedding.py"
,
10
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_embedding_openai_server.py"
,
141
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_function_call_parser.py"
,
10
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/features/test_cache_report.py"
,
100
),
TestFile
(
"openai_server/features/test_enable_thinking.py"
,
70
),
TestFile
(
"openai_server/features/test_json_constrained.py"
,
98
),
TestFile
(
"openai_server/features/test_json_mode.py"
,
90
),
TestFile
(
"openai_server/features/test_openai_server_ebnf.py"
,
95
),
TestFile
(
"openai_server/features/test_openai_server_hidden_states.py"
,
240
),
TestFile
(
"openai_server/features/test_reasoning_content.py"
,
89
),
TestFile
(
"openai_server/function_call/test_openai_function_calling.py"
,
60
),
TestFile
(
"openai_server/function_call/test_tool_choice.py"
,
226
),
TestFile
(
"openai_server/validation/test_large_max_new_tokens.py"
,
41
),
TestFile
(
"openai_server/validation/test_matched_stop.py"
,
60
),
TestFile
(
"openai_server/validation/test_openai_server_ignore_eos.py"
,
85
),
TestFile
(
"openai_server/validation/test_request_length_validation.py"
,
31
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_chunked_prefill.py"
,
108
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
TestFile
(
"test_no_overlap_scheduler.py"
,
234
),
...
@@ -123,9 +145,6 @@ suites = {
...
@@ -123,9 +145,6 @@ suites = {
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_radix_attention.py"
,
105
),
TestFile
(
"test_reasoning_content.py"
,
89
),
TestFile
(
"test_enable_thinking.py"
,
70
),
TestFile
(
"test_request_length_validation.py"
,
31
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
...
...
test/srt/test_embedding_openai_server.py
deleted
100644 → 0
View file @
1998ce40
import
unittest
import
openai
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
)
class
TestOpenAIServer
(
CustomTestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"intfloat/e5-mistral-7b-instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
)
cls
.
base_url
+=
"/v1"
cls
.
tokenizer
=
get_tokenizer
(
cls
.
model
)
@
classmethod
def
tearDownClass
(
cls
):
kill_process_tree
(
cls
.
process
.
pid
)
def
run_embedding
(
self
,
use_list_input
,
token_input
):
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
prompt
=
"The capital of France is"
if
token_input
:
prompt_input
=
self
.
tokenizer
.
encode
(
prompt
)
num_prompt_tokens
=
len
(
prompt_input
)
else
:
prompt_input
=
prompt
num_prompt_tokens
=
len
(
self
.
tokenizer
.
encode
(
prompt
))
if
use_list_input
:
prompt_arg
=
[
prompt_input
]
*
2
num_prompts
=
len
(
prompt_arg
)
num_prompt_tokens
*=
num_prompts
else
:
prompt_arg
=
prompt_input
num_prompts
=
1
response
=
client
.
embeddings
.
create
(
input
=
prompt_arg
,
model
=
self
.
model
,
)
assert
len
(
response
.
data
)
==
num_prompts
assert
isinstance
(
response
.
data
,
list
)
assert
response
.
data
[
0
].
embedding
assert
response
.
data
[
0
].
index
is
not
None
assert
response
.
data
[
0
].
object
==
"embedding"
assert
response
.
model
==
self
.
model
assert
response
.
object
==
"list"
assert
(
response
.
usage
.
prompt_tokens
==
num_prompt_tokens
),
f
"
{
response
.
usage
.
prompt_tokens
}
vs
{
num_prompt_tokens
}
"
assert
(
response
.
usage
.
total_tokens
==
num_prompt_tokens
),
f
"
{
response
.
usage
.
total_tokens
}
vs
{
num_prompt_tokens
}
"
def
run_batch
(
self
):
# FIXME: not implemented
pass
def
test_embedding
(
self
):
# TODO: the fields of encoding_format, dimensions, user are skipped
# TODO: support use_list_input
for
use_list_input
in
[
False
,
True
]:
for
token_input
in
[
False
,
True
]:
self
.
run_embedding
(
use_list_input
,
token_input
)
def
test_batch
(
self
):
self
.
run_batch
()
if
__name__
==
"__main__"
:
unittest
.
main
()
test/srt/test_jinja_template_utils.py
View file @
b7a2df0a
"""
"""
Unit tests for
OpenAI adap
te
r
utils.
Unit tests for
Jinja chat templa
te utils.
"""
"""
import
unittest
import
unittest
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment