Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
72676cd6
Unverified
Commit
72676cd6
authored
Jun 21, 2025
by
Chang Su
Committed by
GitHub
Jun 21, 2025
Browse files
feat(oai refactor): Replace `openai_api` with `entrypoints/openai` (#7351)
Co-authored-by:
Jin Pan
<
jpan236@wisc.edu
>
parent
02bf31ef
Changes
43
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
46 additions
and
158 deletions
+46
-158
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+44
-156
test/srt/test_vlm_accuracy.py
test/srt/test_vlm_accuracy.py
+1
-1
test/srt/test_vlm_input_format.py
test/srt/test_vlm_input_format.py
+1
-1
No files found.
test/srt/test_openai_server.py
View file @
72676cd6
...
...
@@ -235,6 +235,7 @@ class TestOpenAIServer(CustomTestCase):
)
is_firsts
=
{}
is_finished
=
{}
for
response
in
generator
:
usage
=
response
.
usage
if
usage
is
not
None
:
...
...
@@ -244,6 +245,10 @@ class TestOpenAIServer(CustomTestCase):
continue
index
=
response
.
choices
[
0
].
index
finish_reason
=
response
.
choices
[
0
].
finish_reason
if
finish_reason
is
not
None
:
is_finished
[
index
]
=
True
data
=
response
.
choices
[
0
].
delta
if
is_firsts
.
get
(
index
,
True
):
...
...
@@ -253,7 +258,7 @@ class TestOpenAIServer(CustomTestCase):
is_firsts
[
index
]
=
False
continue
if
logprobs
:
if
logprobs
and
not
is_finished
.
get
(
index
,
False
)
:
assert
response
.
choices
[
0
].
logprobs
,
f
"logprobs was not returned"
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
content
[
0
].
top_logprobs
[
0
].
token
,
str
...
...
@@ -271,7 +276,7 @@ class TestOpenAIServer(CustomTestCase):
assert
(
isinstance
(
data
.
content
,
str
)
or
isinstance
(
data
.
reasoning_content
,
str
)
or
len
(
data
.
tool_calls
)
>
0
or
(
isinstance
(
data
.
tool_calls
,
list
)
and
len
(
data
.
tool_calls
)
>
0
)
or
response
.
choices
[
0
].
finish_reason
)
assert
response
.
id
...
...
@@ -282,152 +287,6 @@ class TestOpenAIServer(CustomTestCase):
index
,
True
),
f
"index
{
index
}
is not found in the response"
def
_create_batch
(
self
,
mode
,
client
):
if
mode
==
"completion"
:
input_file_path
=
"complete_input.jsonl"
# write content to input file
content
=
[
{
"custom_id"
:
"request-1"
,
"method"
:
"POST"
,
"url"
:
"/v1/completions"
,
"body"
:
{
"model"
:
"gpt-3.5-turbo-instruct"
,
"prompt"
:
"List 3 names of famous soccer player: "
,
"max_tokens"
:
20
,
},
},
{
"custom_id"
:
"request-2"
,
"method"
:
"POST"
,
"url"
:
"/v1/completions"
,
"body"
:
{
"model"
:
"gpt-3.5-turbo-instruct"
,
"prompt"
:
"List 6 names of famous basketball player: "
,
"max_tokens"
:
40
,
},
},
{
"custom_id"
:
"request-3"
,
"method"
:
"POST"
,
"url"
:
"/v1/completions"
,
"body"
:
{
"model"
:
"gpt-3.5-turbo-instruct"
,
"prompt"
:
"List 6 names of famous tenniss player: "
,
"max_tokens"
:
40
,
},
},
]
else
:
input_file_path
=
"chat_input.jsonl"
content
=
[
{
"custom_id"
:
"request-1"
,
"method"
:
"POST"
,
"url"
:
"/v1/chat/completions"
,
"body"
:
{
"model"
:
"gpt-3.5-turbo-0125"
,
"messages"
:
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
,
},
{
"role"
:
"user"
,
"content"
:
"Hello! List 3 NBA players and tell a story"
,
},
],
"max_tokens"
:
30
,
},
},
{
"custom_id"
:
"request-2"
,
"method"
:
"POST"
,
"url"
:
"/v1/chat/completions"
,
"body"
:
{
"model"
:
"gpt-3.5-turbo-0125"
,
"messages"
:
[
{
"role"
:
"system"
,
"content"
:
"You are an assistant. "
},
{
"role"
:
"user"
,
"content"
:
"Hello! List three capital and tell a story"
,
},
],
"max_tokens"
:
50
,
},
},
]
with
open
(
input_file_path
,
"w"
)
as
file
:
for
line
in
content
:
file
.
write
(
json
.
dumps
(
line
)
+
"
\n
"
)
with
open
(
input_file_path
,
"rb"
)
as
file
:
uploaded_file
=
client
.
files
.
create
(
file
=
file
,
purpose
=
"batch"
)
if
mode
==
"completion"
:
endpoint
=
"/v1/completions"
elif
mode
==
"chat"
:
endpoint
=
"/v1/chat/completions"
completion_window
=
"24h"
batch_job
=
client
.
batches
.
create
(
input_file_id
=
uploaded_file
.
id
,
endpoint
=
endpoint
,
completion_window
=
completion_window
,
)
return
batch_job
,
content
,
uploaded_file
def
run_batch
(
self
,
mode
):
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
batch_job
,
content
,
uploaded_file
=
self
.
_create_batch
(
mode
=
mode
,
client
=
client
)
while
batch_job
.
status
not
in
[
"completed"
,
"failed"
,
"cancelled"
]:
time
.
sleep
(
3
)
print
(
f
"Batch job status:
{
batch_job
.
status
}
...trying again in 3 seconds..."
)
batch_job
=
client
.
batches
.
retrieve
(
batch_job
.
id
)
assert
(
batch_job
.
status
==
"completed"
),
f
"Batch job status is not completed:
{
batch_job
.
status
}
"
assert
batch_job
.
request_counts
.
completed
==
len
(
content
)
assert
batch_job
.
request_counts
.
failed
==
0
assert
batch_job
.
request_counts
.
total
==
len
(
content
)
result_file_id
=
batch_job
.
output_file_id
file_response
=
client
.
files
.
content
(
result_file_id
)
result_content
=
file_response
.
read
().
decode
(
"utf-8"
)
# Decode bytes to string
results
=
[
json
.
loads
(
line
)
for
line
in
result_content
.
split
(
"
\n
"
)
if
line
.
strip
()
!=
""
]
assert
len
(
results
)
==
len
(
content
)
for
delete_fid
in
[
uploaded_file
.
id
,
result_file_id
]:
del_pesponse
=
client
.
files
.
delete
(
delete_fid
)
assert
del_pesponse
.
deleted
def
run_cancel_batch
(
self
,
mode
):
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
batch_job
,
_
,
uploaded_file
=
self
.
_create_batch
(
mode
=
mode
,
client
=
client
)
assert
batch_job
.
status
not
in
[
"cancelling"
,
"cancelled"
]
batch_job
=
client
.
batches
.
cancel
(
batch_id
=
batch_job
.
id
)
assert
batch_job
.
status
==
"cancelling"
while
batch_job
.
status
not
in
[
"failed"
,
"cancelled"
]:
batch_job
=
client
.
batches
.
retrieve
(
batch_job
.
id
)
print
(
f
"Batch job status:
{
batch_job
.
status
}
...trying again in 3 seconds..."
)
time
.
sleep
(
3
)
assert
batch_job
.
status
==
"cancelled"
del_response
=
client
.
files
.
delete
(
uploaded_file
.
id
)
assert
del_response
.
deleted
def
test_completion
(
self
):
for
echo
in
[
False
,
True
]:
for
logprobs
in
[
None
,
5
]:
...
...
@@ -467,14 +326,6 @@ class TestOpenAIServer(CustomTestCase):
for
parallel_sample_num
in
[
1
,
2
]:
self
.
run_chat_completion_stream
(
logprobs
,
parallel_sample_num
)
def
test_batch
(
self
):
for
mode
in
[
"completion"
,
"chat"
]:
self
.
run_batch
(
mode
)
def
test_cancel_batch
(
self
):
for
mode
in
[
"completion"
,
"chat"
]:
self
.
run_cancel_batch
(
mode
)
def
test_regex
(
self
):
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
...
...
@@ -559,6 +410,18 @@ The SmartHome Mini is a compact smart home assistant available in black or white
assert
len
(
models
)
==
1
assert
isinstance
(
getattr
(
models
[
0
],
"max_model_len"
,
None
),
int
)
def
test_retrieve_model
(
self
):
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
# Test retrieving an existing model
retrieved_model
=
client
.
models
.
retrieve
(
self
.
model
)
self
.
assertEqual
(
retrieved_model
.
id
,
self
.
model
)
self
.
assertEqual
(
retrieved_model
.
root
,
self
.
model
)
# Test retrieving a non-existent model
with
self
.
assertRaises
(
openai
.
NotFoundError
):
client
.
models
.
retrieve
(
"non-existent-model"
)
# -------------------------------------------------------------------------
# EBNF Test Class: TestOpenAIServerEBNF
...
...
@@ -684,6 +547,31 @@ class TestOpenAIEmbedding(CustomTestCase):
self
.
assertTrue
(
len
(
response
.
data
[
0
].
embedding
)
>
0
)
self
.
assertTrue
(
len
(
response
.
data
[
1
].
embedding
)
>
0
)
def
test_embedding_single_batch_str
(
self
):
"""Test embedding with a List[str] and length equals to 1"""
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
response
=
client
.
embeddings
.
create
(
model
=
self
.
model
,
input
=
[
"Hello world"
])
self
.
assertEqual
(
len
(
response
.
data
),
1
)
self
.
assertTrue
(
len
(
response
.
data
[
0
].
embedding
)
>
0
)
def
test_embedding_single_int_list
(
self
):
"""Test embedding with a List[int] or List[List[int]]]"""
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
response
=
client
.
embeddings
.
create
(
model
=
self
.
model
,
input
=
[[
15339
,
314
,
703
,
284
,
612
,
262
,
10658
,
10188
,
286
,
2061
]],
)
self
.
assertEqual
(
len
(
response
.
data
),
1
)
self
.
assertTrue
(
len
(
response
.
data
[
0
].
embedding
)
>
0
)
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
response
=
client
.
embeddings
.
create
(
model
=
self
.
model
,
input
=
[
15339
,
314
,
703
,
284
,
612
,
262
,
10658
,
10188
,
286
,
2061
],
)
self
.
assertEqual
(
len
(
response
.
data
),
1
)
self
.
assertTrue
(
len
(
response
.
data
[
0
].
embedding
)
>
0
)
def
test_empty_string_embedding
(
self
):
"""Test embedding an empty string."""
...
...
test/srt/test_vlm_accuracy.py
View file @
72676cd6
...
...
@@ -21,6 +21,7 @@ from transformers import (
from
sglang
import
Engine
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.conversation
import
generate_chat_conv
from
sglang.srt.entrypoints.openai.protocol
import
ChatCompletionRequest
from
sglang.srt.managers.mm_utils
import
embed_mm_inputs
,
init_embedding_cache
from
sglang.srt.managers.multimodal_processors.base_processor
import
(
BaseMultimodalProcessor
,
...
...
@@ -31,7 +32,6 @@ from sglang.srt.managers.schedule_batch import (
MultimodalInputs
,
)
from
sglang.srt.model_executor.model_runner
import
ModelRunner
from
sglang.srt.openai_api.protocol
import
ChatCompletionRequest
from
sglang.srt.server_args
import
ServerArgs
...
...
test/srt/test_vlm_input_format.py
View file @
72676cd6
...
...
@@ -15,7 +15,7 @@ from transformers import (
from
sglang
import
Engine
from
sglang.srt.conversation
import
generate_chat_conv
from
sglang.srt.openai
_api
.protocol
import
ChatCompletionRequest
from
sglang.srt.
entrypoints.
openai.protocol
import
ChatCompletionRequest
TEST_IMAGE_URL
=
"https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment