Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
60340a36
Unverified
Commit
60340a36
authored
Aug 01, 2024
by
Ying Sheng
Committed by
GitHub
Aug 01, 2024
Browse files
Improve the coverage of the openai api server test (#878)
parent
70c78cfb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
151 additions
and
20 deletions
+151
-20
python/sglang/srt/layers/logits_processor.py
python/sglang/srt/layers/logits_processor.py
+1
-1
python/sglang/srt/server.py
python/sglang/srt/server.py
+1
-1
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+149
-18
No files found.
python/sglang/srt/layers/logits_processor.py
View file @
60340a36
...
@@ -209,7 +209,7 @@ class LogitsProcessor(nn.Module):
...
@@ -209,7 +209,7 @@ class LogitsProcessor(nn.Module):
all_logits
=
all_logits
[:,
:
self
.
config
.
vocab_size
].
float
()
all_logits
=
all_logits
[:,
:
self
.
config
.
vocab_size
].
float
()
all_logprobs
=
all_logits
all_logprobs
=
all_logits
del
all_logits
del
all_logits
,
hidden_states
all_logprobs
[:]
=
torch
.
nn
.
functional
.
log_softmax
(
all_logprobs
,
dim
=-
1
)
all_logprobs
[:]
=
torch
.
nn
.
functional
.
log_softmax
(
all_logprobs
,
dim
=-
1
)
# Get the logprob of top-k tokens
# Get the logprob of top-k tokens
...
...
python/sglang/srt/server.py
View file @
60340a36
...
@@ -72,8 +72,8 @@ from sglang.srt.utils import (
...
@@ -72,8 +72,8 @@ from sglang.srt.utils import (
allocate_init_ports
,
allocate_init_ports
,
assert_pkg_version
,
assert_pkg_version
,
enable_show_time_cost
,
enable_show_time_cost
,
maybe_set_triton_cache_manager
,
kill_child_process
,
kill_child_process
,
maybe_set_triton_cache_manager
,
set_ulimit
,
set_ulimit
,
)
)
from
sglang.utils
import
get_exception_traceback
from
sglang.utils
import
get_exception_traceback
...
...
test/srt/test_openai_server.py
View file @
60340a36
import
json
import
subprocess
import
subprocess
import
time
import
time
import
unittest
import
unittest
...
@@ -17,10 +18,15 @@ class TestOpenAIServer(unittest.TestCase):
...
@@ -17,10 +18,15 @@ class TestOpenAIServer(unittest.TestCase):
timeout
=
300
timeout
=
300
command
=
[
command
=
[
"python3"
,
"-m"
,
"sglang.launch_server"
,
"python3"
,
"--model-path"
,
model
,
"-m"
,
"--host"
,
"localhost"
,
"sglang.launch_server"
,
"--port"
,
str
(
port
),
"--model-path"
,
model
,
"--host"
,
"localhost"
,
"--port"
,
str
(
port
),
]
]
cls
.
process
=
subprocess
.
Popen
(
command
,
stdout
=
None
,
stderr
=
None
)
cls
.
process
=
subprocess
.
Popen
(
command
,
stdout
=
None
,
stderr
=
None
)
cls
.
base_url
=
f
"http://localhost:
{
port
}
/v1"
cls
.
base_url
=
f
"http://localhost:
{
port
}
/v1"
...
@@ -41,25 +47,38 @@ class TestOpenAIServer(unittest.TestCase):
...
@@ -41,25 +47,38 @@ class TestOpenAIServer(unittest.TestCase):
def
tearDownClass
(
cls
):
def
tearDownClass
(
cls
):
kill_child_process
(
cls
.
process
.
pid
)
kill_child_process
(
cls
.
process
.
pid
)
def
run_completion
(
self
,
echo
,
logprobs
):
def
run_completion
(
self
,
echo
,
logprobs
,
use_list_input
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
self
.
base_url
)
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
self
.
base_url
)
prompt
=
"The capital of France is"
prompt
=
"The capital of France is"
if
use_list_input
:
prompt_arg
=
[
prompt
,
prompt
]
num_choices
=
len
(
prompt_arg
)
else
:
prompt_arg
=
prompt
num_choices
=
1
response
=
client
.
completions
.
create
(
response
=
client
.
completions
.
create
(
model
=
self
.
model
,
model
=
self
.
model
,
prompt
=
prompt
,
prompt
=
prompt
_arg
,
temperature
=
0.1
,
temperature
=
0.1
,
max_tokens
=
32
,
max_tokens
=
32
,
echo
=
echo
,
echo
=
echo
,
logprobs
=
logprobs
,
logprobs
=
logprobs
,
)
)
text
=
response
.
choices
[
0
].
text
assert
len
(
response
.
choices
)
==
num_choices
if
echo
:
if
echo
:
text
=
response
.
choices
[
0
].
text
assert
text
.
startswith
(
prompt
)
assert
text
.
startswith
(
prompt
)
if
logprobs
:
if
logprobs
:
assert
response
.
choices
[
0
].
logprobs
assert
response
.
choices
[
0
].
logprobs
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
tokens
[
0
],
str
)
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
tokens
[
0
],
str
)
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
[
1
],
dict
)
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
[
1
],
dict
)
assert
len
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
[
1
])
==
logprobs
ret_num_top_logprobs
=
len
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
[
1
])
# FIXME: Fix this bug. Sometimes, some top_logprobs are missing in the return value.
# assert ret_num_top_logprobs == logprobs, f"{ret_num_top_logprobs} vs {logprobs}"
if
echo
:
if
echo
:
assert
response
.
choices
[
0
].
logprobs
.
token_logprobs
[
0
]
==
None
assert
response
.
choices
[
0
].
logprobs
.
token_logprobs
[
0
]
==
None
else
:
else
:
...
@@ -89,8 +108,14 @@ class TestOpenAIServer(unittest.TestCase):
...
@@ -89,8 +108,14 @@ class TestOpenAIServer(unittest.TestCase):
assert
response
.
choices
[
0
].
logprobs
assert
response
.
choices
[
0
].
logprobs
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
tokens
[
0
],
str
)
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
tokens
[
0
],
str
)
if
not
(
first
and
echo
):
if
not
(
first
and
echo
):
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
[
0
],
dict
)
assert
isinstance
(
#assert len(response.choices[0].logprobs.top_logprobs[0]) == logprobs
response
.
choices
[
0
].
logprobs
.
top_logprobs
[
0
],
dict
)
ret_num_top_logprobs
=
len
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
[
0
]
)
# FIXME: Fix this bug. Sometimes, some top_logprobs are missing in the return value.
# assert ret_num_top_logprobs == logprobs, f"{ret_num_top_logprobs} vs {logprobs}"
if
first
:
if
first
:
if
echo
:
if
echo
:
...
@@ -103,21 +128,127 @@ class TestOpenAIServer(unittest.TestCase):
...
@@ -103,21 +128,127 @@ class TestOpenAIServer(unittest.TestCase):
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
def
run_chat_completion
(
self
,
logprobs
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
self
.
base_url
)
response
=
client
.
chat
.
completions
.
create
(
model
=
self
.
model
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
"What is the capital of France?"
},
],
temperature
=
0
,
max_tokens
=
32
,
logprobs
=
logprobs
is
not
None
and
logprobs
>
0
,
top_logprobs
=
logprobs
,
)
if
logprobs
:
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
content
[
0
].
top_logprobs
[
0
].
token
,
str
)
ret_num_top_logprobs
=
len
(
response
.
choices
[
0
].
logprobs
.
content
[
0
].
top_logprobs
)
assert
(
ret_num_top_logprobs
==
logprobs
),
f
"
{
ret_num_top_logprobs
}
vs
{
logprobs
}
"
assert
response
.
choices
[
0
].
message
.
role
==
"assistant"
assert
isinstance
(
response
.
choices
[
0
].
message
.
content
,
str
)
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
def
run_chat_completion_stream
(
self
,
logprobs
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
self
.
base_url
)
generator
=
client
.
chat
.
completions
.
create
(
model
=
self
.
model
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
"What is the capital of France?"
},
],
temperature
=
0
,
max_tokens
=
32
,
logprobs
=
logprobs
is
not
None
and
logprobs
>
0
,
top_logprobs
=
logprobs
,
stream
=
True
,
)
is_first
=
True
for
response
in
generator
:
print
(
response
)
data
=
response
.
choices
[
0
].
delta
if
is_first
:
data
.
role
==
"assistant"
is_first
=
False
continue
if
logprobs
:
# FIXME: Fix this bug. Return top_logprobs in the streaming mode.
pass
assert
isinstance
(
data
.
content
,
str
)
assert
response
.
id
assert
response
.
created
def
test_completion
(
self
):
def
test_completion
(
self
):
for
echo
in
[
False
,
True
]:
for
echo
in
[
False
,
True
]:
for
logprobs
in
[
None
,
5
]:
for
logprobs
in
[
None
,
5
]:
self
.
run_completion
(
echo
,
logprobs
)
for
use_list_input
in
[
True
,
False
]:
self
.
run_completion
(
echo
,
logprobs
,
use_list_input
)
def
test_completion_stream
(
self
):
def
test_completion_stream
(
self
):
for
echo
in
[
True
]:
for
echo
in
[
False
,
True
]:
for
logprobs
in
[
5
]:
for
logprobs
in
[
None
,
5
]:
self
.
run_completion_stream
(
echo
,
logprobs
)
self
.
run_completion_stream
(
echo
,
logprobs
)
def
test_chat_completion
(
self
):
for
logprobs
in
[
None
,
5
]:
self
.
run_chat_completion
(
logprobs
)
def
test_chat_completion_stream
(
self
):
for
logprobs
in
[
None
,
5
]:
self
.
run_chat_completion_stream
(
logprobs
)
def
test_regex
(
self
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
self
.
base_url
)
regex
=
(
r
"""\{\n"""
+
r
""" "name": "[\w]+",\n"""
+
r
""" "population": [\d]+\n"""
+
r
"""\}"""
)
response
=
client
.
chat
.
completions
.
create
(
model
=
self
.
model
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
"Introduce the capital of France."
},
],
temperature
=
0
,
max_tokens
=
128
,
extra_body
=
{
"regex"
:
regex
},
)
text
=
response
.
choices
[
0
].
message
.
content
try
:
js_obj
=
json
.
loads
(
text
)
except
(
TypeError
,
json
.
decoder
.
JSONDecodeError
):
print
(
"JSONDecodeError"
,
text
)
raise
assert
isinstance
(
js_obj
[
"name"
],
str
)
assert
isinstance
(
js_obj
[
"population"
],
int
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
#
unittest.main(warnings="ignore")
unittest
.
main
(
warnings
=
"ignore"
)
t
=
TestOpenAIServer
()
#
t = TestOpenAIServer()
t
.
setUpClass
()
#
t.setUpClass()
t
.
test_completion_stream
()
#
t.test_
chat_
completion_stream()
t
.
tearDownClass
()
#
t.tearDownClass()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment