Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
72b6ea88
Unverified
Commit
72b6ea88
authored
Aug 01, 2024
by
Ying Sheng
Committed by
GitHub
Aug 01, 2024
Browse files
Make scripts under `/test/srt` as unit tests (#875)
parent
e4d3333c
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
362 additions
and
221 deletions
+362
-221
.github/workflows/unit-test.yml
.github/workflows/unit-test.yml
+10
-3
python/sglang/srt/server.py
python/sglang/srt/server.py
+2
-10
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+20
-0
python/sglang/test/test_programs.py
python/sglang/test/test_programs.py
+4
-4
test/lang/test_srt_backend.py
test/lang/test_srt_backend.py
+1
-1
test/srt/example_image.png
test/srt/example_image.png
+0
-1
test/srt/old/test_curl.sh
test/srt/old/test_curl.sh
+0
-0
test/srt/old/test_flashinfer.py
test/srt/old/test_flashinfer.py
+0
-0
test/srt/old/test_httpserver_classify.py
test/srt/old/test_httpserver_classify.py
+0
-0
test/srt/old/test_httpserver_concurrent.py
test/srt/old/test_httpserver_concurrent.py
+0
-0
test/srt/old/test_httpserver_decode.py
test/srt/old/test_httpserver_decode.py
+0
-0
test/srt/old/test_httpserver_decode_stream.py
test/srt/old/test_httpserver_decode_stream.py
+0
-0
test/srt/old/test_httpserver_llava.py
test/srt/old/test_httpserver_llava.py
+0
-0
test/srt/old/test_httpserver_reuse.py
test/srt/old/test_httpserver_reuse.py
+0
-0
test/srt/old/test_jump_forward.py
test/srt/old/test_jump_forward.py
+0
-0
test/srt/old/test_openai_server.py
test/srt/old/test_openai_server.py
+209
-0
test/srt/old/test_robust.py
test/srt/old/test_robust.py
+0
-0
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+116
-202
No files found.
.github/workflows/unit-test.yml
View file @
72b6ea88
...
...
@@ -20,8 +20,6 @@ concurrency:
jobs
:
unit-test
:
runs-on
:
self-hosted
env
:
CUDA_VISIBLE_DEVICES
:
6
steps
:
-
name
:
Checkout code
...
...
@@ -30,6 +28,7 @@ jobs:
-
name
:
Install dependencies
run
:
|
cd /data/zhyncs/venv && source ./bin/activate && cd -
pip cache purge
pip install --upgrade pip
pip install -e "python[all]"
...
...
@@ -39,6 +38,14 @@ jobs:
-
name
:
Test OpenAI Backend
run
:
|
cd /data/zhyncs/venv && source ./bin/activate && cd -
cd test/lang
export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}
cd test/lang
python3 test_openai_backend.py
-
name
:
Test SRT Backend
run
:
|
cd /data/zhyncs/venv && source ./bin/activate && cd -
cd test/lang
python3 test_srt_backend.py
python/sglang/srt/server.py
View file @
72b6ea88
...
...
@@ -73,6 +73,7 @@ from sglang.srt.utils import (
assert_pkg_version
,
enable_show_time_cost
,
maybe_set_triton_cache_manager
,
kill_child_process
,
set_ulimit
,
)
from
sglang.utils
import
get_exception_traceback
...
...
@@ -467,16 +468,7 @@ class Runtime:
def
shutdown
(
self
):
if
self
.
pid
is
not
None
:
try
:
parent
=
psutil
.
Process
(
self
.
pid
)
except
psutil
.
NoSuchProcess
:
return
children
=
parent
.
children
(
recursive
=
True
)
for
child
in
children
:
child
.
kill
()
psutil
.
wait_procs
(
children
,
timeout
=
5
)
parent
.
kill
()
parent
.
wait
(
timeout
=
5
)
kill_child_process
(
self
.
pid
)
self
.
pid
=
None
def
cache_prefix
(
self
,
prefix
:
str
):
...
...
python/sglang/srt/utils.py
View file @
72b6ea88
...
...
@@ -366,6 +366,26 @@ def kill_parent_process():
os
.
kill
(
parent_process
.
pid
,
9
)
def
kill_child_process
(
pid
,
including_parent
=
True
):
try
:
parent
=
psutil
.
Process
(
pid
)
except
psutil
.
NoSuchProcess
:
return
children
=
parent
.
children
(
recursive
=
True
)
for
child
in
children
:
try
:
child
.
kill
()
except
psutil
.
NoSuchProcess
:
pass
if
including_parent
:
try
:
parent
.
kill
()
except
psutil
.
NoSuchProcess
:
pass
def
monkey_patch_vllm_p2p_access_check
(
gpu_id
:
int
):
"""
Monkey patch the slow p2p access check in vllm.
...
...
python/sglang/test/test_programs.py
View file @
72b6ea88
...
...
@@ -105,15 +105,14 @@ def test_decode_json_regex():
def
decode_json
(
s
):
from
sglang.lang.ir
import
REGEX_FLOAT
,
REGEX_INT
,
REGEX_STRING
s
+=
"Generate a JSON object to describe the basic information of
a city
.
\n
"
s
+=
"Generate a JSON object to describe the basic
city
information of
Paris
.
\n
"
with
s
.
var_scope
(
"json_output"
):
s
+=
"{
\n
"
s
+=
' "name": '
+
sgl
.
gen
(
regex
=
REGEX_STRING
+
","
)
+
"
\n
"
s
+=
' "population": '
+
sgl
.
gen
(
regex
=
REGEX_INT
+
","
)
+
"
\n
"
s
+=
' "area": '
+
sgl
.
gen
(
regex
=
REGEX_INT
+
","
)
+
"
\n
"
s
+=
' "latitude": '
+
sgl
.
gen
(
regex
=
REGEX_FLOAT
+
","
)
+
"
\n
"
s
+=
' "country": '
+
sgl
.
gen
(
regex
=
REGEX_STRING
)
+
"
\n
"
s
+=
' "latitude": '
+
sgl
.
gen
(
regex
=
REGEX_FLOAT
)
+
"
\n
"
s
+=
"}"
ret
=
decode_json
.
run
(
temperature
=
0.0
)
...
...
@@ -129,7 +128,7 @@ def test_decode_json_regex():
def
test_decode_json
():
@
sgl
.
function
def
decode_json
(
s
):
s
+=
"Generate a JSON object to describe the basic information of
a city
.
\n
"
s
+=
"Generate a JSON object to describe the basic
city
information of
Paris
.
\n
"
with
s
.
var_scope
(
"json_output"
):
s
+=
"{
\n
"
...
...
@@ -264,6 +263,7 @@ def test_parallel_decoding():
s
+=
"
\n
In summary,"
+
sgl
.
gen
(
"summary"
,
max_tokens
=
512
)
ret
=
parallel_decoding
.
run
(
topic
=
"writing a good blog post"
,
temperature
=
0.3
)
assert
isinstance
(
ret
[
"summary"
],
str
)
def
test_parallel_encoding
(
check_answer
=
True
):
...
...
test/lang/test_srt_backend.py
View file @
72b6ea88
...
...
@@ -21,7 +21,7 @@ class TestSRTBackend(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
backend
=
sgl
.
Runtime
(
model_path
=
"meta-llama/Meta-Llama-3-8B-Instruct"
)
cls
.
backend
=
sgl
.
Runtime
(
model_path
=
"meta-llama/Meta-Llama-3
.1
-8B-Instruct"
)
sgl
.
set_default_backend
(
cls
.
backend
)
@
classmethod
...
...
test/srt/example_image.png
deleted
120000 → 0
View file @
e4d3333c
../lang/example_image.png
\ No newline at end of file
test/srt/test_curl.sh
→
test/srt/
old/
test_curl.sh
View file @
72b6ea88
File moved
test/srt/test_flashinfer.py
→
test/srt/
old/
test_flashinfer.py
View file @
72b6ea88
File moved
test/srt/test_httpserver_classify.py
→
test/srt/
old/
test_httpserver_classify.py
View file @
72b6ea88
File moved
test/srt/test_httpserver_concurrent.py
→
test/srt/
old/
test_httpserver_concurrent.py
View file @
72b6ea88
File moved
test/srt/test_httpserver_decode.py
→
test/srt/
old/
test_httpserver_decode.py
View file @
72b6ea88
File moved
test/srt/test_httpserver_decode_stream.py
→
test/srt/
old/
test_httpserver_decode_stream.py
View file @
72b6ea88
File moved
test/srt/test_httpserver_llava.py
→
test/srt/
old/
test_httpserver_llava.py
View file @
72b6ea88
File moved
test/srt/test_httpserver_reuse.py
→
test/srt/
old/
test_httpserver_reuse.py
View file @
72b6ea88
File moved
test/srt/test_jump_forward.py
→
test/srt/
old/
test_jump_forward.py
View file @
72b6ea88
File moved
test/srt/old/test_openai_server.py
0 → 100644
View file @
72b6ea88
"""
First run the following command to launch the server.
Note that TinyLlama adopts different chat templates in different versions.
For v0.4, the chat template is chatml.
python3 -m sglang.launch_server --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4
\
--port 30000 --chat-template chatml
Output example:
The capital of France is Paris.
The capital of the United States is Washington, D.C.
The capital of Canada is Ottawa.
The capital of Japan is Tokyo
"""
import
argparse
import
json
import
openai
def
test_completion
(
args
,
echo
,
logprobs
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
response
=
client
.
completions
.
create
(
model
=
"default"
,
prompt
=
"The capital of France is"
,
temperature
=
0
,
max_tokens
=
32
,
echo
=
echo
,
logprobs
=
logprobs
,
)
text
=
response
.
choices
[
0
].
text
print
(
response
.
choices
[
0
].
text
)
if
echo
:
assert
text
.
startswith
(
"The capital of France is"
)
if
logprobs
:
print
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
)
assert
response
.
choices
[
0
].
logprobs
if
echo
:
assert
response
.
choices
[
0
].
logprobs
.
token_logprobs
[
0
]
==
None
else
:
assert
response
.
choices
[
0
].
logprobs
.
token_logprobs
[
0
]
!=
None
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
print
(
"="
*
100
)
def
test_completion_stream
(
args
,
echo
,
logprobs
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
response
=
client
.
completions
.
create
(
model
=
"default"
,
prompt
=
"The capital of France is"
,
temperature
=
0
,
max_tokens
=
32
,
stream
=
True
,
echo
=
echo
,
logprobs
=
logprobs
,
)
first
=
True
for
r
in
response
:
if
first
:
if
echo
:
assert
r
.
choices
[
0
].
text
.
startswith
(
"The capital of France is"
)
first
=
False
if
logprobs
:
print
(
f
"
{
r
.
choices
[
0
].
text
:
12
s
}
\t
"
f
"
{
r
.
choices
[
0
].
logprobs
.
token_logprobs
}
"
,
flush
=
True
,
)
print
(
r
.
choices
[
0
].
logprobs
.
top_logprobs
)
else
:
print
(
r
.
choices
[
0
].
text
,
end
=
""
,
flush
=
True
)
assert
r
.
id
assert
r
.
usage
.
prompt_tokens
>
0
assert
r
.
usage
.
completion_tokens
>
0
assert
r
.
usage
.
total_tokens
>
0
print
(
"="
*
100
)
def
test_chat_completion
(
args
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
response
=
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
"What is the capital of France?"
},
],
temperature
=
0
,
max_tokens
=
32
,
)
print
(
response
.
choices
[
0
].
message
.
content
)
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
print
(
"="
*
100
)
def
test_chat_completion_image
(
args
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
response
=
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"Describe this image"
},
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"https://raw.githubusercontent.com/sgl-project/sglang/main/assets/mixtral_8x7b.jpg"
},
},
],
},
],
temperature
=
0
,
max_tokens
=
32
,
)
print
(
response
.
choices
[
0
].
message
.
content
)
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
print
(
"="
*
100
)
def
test_chat_completion_stream
(
args
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
response
=
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
"List 3 countries and their capitals."
},
],
temperature
=
0
,
max_tokens
=
64
,
stream
=
True
,
)
is_first
=
True
for
chunk
in
response
:
if
is_first
:
is_first
=
False
assert
chunk
.
choices
[
0
].
delta
.
role
==
"assistant"
continue
data
=
chunk
.
choices
[
0
].
delta
if
not
data
.
content
:
continue
print
(
data
.
content
,
end
=
""
,
flush
=
True
)
print
(
"="
*
100
)
def
test_regex
(
args
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
regex
=
(
r
"""\{\n"""
+
r
""" "name": "[\w]+",\n"""
+
r
""" "population": [\d]+\n"""
+
r
"""\}"""
)
response
=
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
"Introduce the capital of France."
},
],
temperature
=
0
,
max_tokens
=
128
,
extra_body
=
{
"regex"
:
regex
},
)
text
=
response
.
choices
[
0
].
message
.
content
print
(
json
.
loads
(
text
))
print
(
"="
*
100
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--base-url"
,
type
=
str
,
default
=
"http://127.0.0.1:30000/v1"
)
parser
.
add_argument
(
"--test-image"
,
action
=
"store_true"
,
help
=
"Enables testing image inputs"
)
args
=
parser
.
parse_args
()
test_completion
(
args
,
echo
=
False
,
logprobs
=
False
)
test_completion
(
args
,
echo
=
True
,
logprobs
=
False
)
test_completion
(
args
,
echo
=
False
,
logprobs
=
True
)
test_completion
(
args
,
echo
=
True
,
logprobs
=
True
)
test_completion
(
args
,
echo
=
False
,
logprobs
=
3
)
test_completion
(
args
,
echo
=
True
,
logprobs
=
3
)
test_completion_stream
(
args
,
echo
=
False
,
logprobs
=
False
)
test_completion_stream
(
args
,
echo
=
True
,
logprobs
=
False
)
test_completion_stream
(
args
,
echo
=
False
,
logprobs
=
True
)
test_completion_stream
(
args
,
echo
=
True
,
logprobs
=
True
)
test_completion_stream
(
args
,
echo
=
False
,
logprobs
=
3
)
test_completion_stream
(
args
,
echo
=
True
,
logprobs
=
3
)
test_chat_completion
(
args
)
test_chat_completion_stream
(
args
)
test_regex
(
args
)
if
args
.
test_image
:
test_chat_completion_image
(
args
)
test/srt/test_robust.py
→
test/srt/
old/
test_robust.py
View file @
72b6ea88
File moved
test/srt/test_openai_server.py
View file @
72b6ea88
"""
First run the following command to launch the server.
Note that TinyLlama adopts different chat templates in different versions.
For v0.4, the chat template is chatml.
python3 -m sglang.launch_server --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4
\
--port 30000 --chat-template chatml
Output example:
The capital of France is Paris.
The capital of the United States is Washington, D.C.
The capital of Canada is Ottawa.
The capital of Japan is Tokyo
"""
import
argparse
import
json
import
subprocess
import
time
import
unittest
import
openai
def
test_completion
(
args
,
echo
,
logprobs
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
response
=
client
.
completions
.
create
(
model
=
"default"
,
prompt
=
"The capital of France is"
,
temperature
=
0
,
max_tokens
=
32
,
echo
=
echo
,
logprobs
=
logprobs
,
)
text
=
response
.
choices
[
0
].
text
print
(
response
.
choices
[
0
].
text
)
if
echo
:
assert
text
.
startswith
(
"The capital of France is"
)
if
logprobs
:
print
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
)
assert
response
.
choices
[
0
].
logprobs
import
requests
from
sglang.srt.utils
import
kill_child_process
class
TestOpenAIServer
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
model
=
"meta-llama/Meta-Llama-3.1-8B-Instruct"
port
=
30000
timeout
=
300
command
=
[
"python3"
,
"-m"
,
"sglang.launch_server"
,
"--model-path"
,
model
,
"--host"
,
"localhost"
,
"--port"
,
str
(
port
),
]
cls
.
process
=
subprocess
.
Popen
(
command
,
stdout
=
None
,
stderr
=
None
)
cls
.
base_url
=
f
"http://localhost:
{
port
}
/v1"
cls
.
model
=
model
start_time
=
time
.
time
()
while
time
.
time
()
-
start_time
<
timeout
:
try
:
response
=
requests
.
get
(
f
"
{
cls
.
base_url
}
/models"
)
if
response
.
status_code
==
200
:
return
except
requests
.
RequestException
:
pass
time
.
sleep
(
10
)
raise
TimeoutError
(
"Server failed to start within the timeout period."
)
@
classmethod
def
tearDownClass
(
cls
):
kill_child_process
(
cls
.
process
.
pid
)
def
run_completion
(
self
,
echo
,
logprobs
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
self
.
base_url
)
prompt
=
"The capital of France is"
response
=
client
.
completions
.
create
(
model
=
self
.
model
,
prompt
=
prompt
,
temperature
=
0.1
,
max_tokens
=
32
,
echo
=
echo
,
logprobs
=
logprobs
,
)
text
=
response
.
choices
[
0
].
text
if
echo
:
assert
response
.
choices
[
0
].
logprobs
.
token_logprobs
[
0
]
==
None
else
:
assert
response
.
choices
[
0
].
logprobs
.
token_logprobs
[
0
]
!=
None
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
print
(
"="
*
100
)
def
test_completion_stream
(
args
,
echo
,
logprobs
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
response
=
client
.
completions
.
create
(
model
=
"default"
,
prompt
=
"The capital of France is"
,
temperature
=
0
,
max_tokens
=
32
,
stream
=
True
,
echo
=
echo
,
logprobs
=
logprobs
,
)
first
=
True
for
r
in
response
:
if
first
:
if
echo
:
assert
r
.
choices
[
0
].
text
.
startswith
(
"The capital of France is"
)
first
=
False
assert
text
.
startswith
(
prompt
)
if
logprobs
:
print
(
f
"
{
r
.
choices
[
0
].
text
:
12
s
}
\t
"
f
"
{
r
.
choices
[
0
].
logprobs
.
token_logprobs
}
"
,
flush
=
True
,
)
print
(
r
.
choices
[
0
].
logprobs
.
top_logprobs
)
else
:
print
(
r
.
choices
[
0
].
text
,
end
=
""
,
flush
=
True
)
assert
r
.
id
assert
r
.
usage
.
prompt_tokens
>
0
assert
r
.
usage
.
completion_tokens
>
0
assert
r
.
usage
.
total_tokens
>
0
print
(
"="
*
100
)
def
test_chat_completion
(
args
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
response
=
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
"What is the capital of France?"
},
],
temperature
=
0
,
max_tokens
=
32
,
)
print
(
response
.
choices
[
0
].
message
.
content
)
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
print
(
"="
*
100
)
def
test_chat_completion_image
(
args
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
response
=
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"Describe this image"
},
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"https://raw.githubusercontent.com/sgl-project/sglang/main/assets/mixtral_8x7b.jpg"
},
},
],
},
],
temperature
=
0
,
max_tokens
=
32
,
)
print
(
response
.
choices
[
0
].
message
.
content
)
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
print
(
"="
*
100
)
def
test_chat_completion_stream
(
args
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
response
=
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
"List 3 countries and their capitals."
},
],
temperature
=
0
,
max_tokens
=
64
,
stream
=
True
,
)
is_first
=
True
for
chunk
in
response
:
if
is_first
:
is_first
=
False
assert
chunk
.
choices
[
0
].
delta
.
role
==
"assistant"
continue
data
=
chunk
.
choices
[
0
].
delta
if
not
data
.
content
:
continue
print
(
data
.
content
,
end
=
""
,
flush
=
True
)
print
(
"="
*
100
)
def
test_regex
(
args
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
args
.
base_url
)
regex
=
(
r
"""\{\n"""
+
r
""" "name": "[\w]+",\n"""
+
r
""" "population": [\d]+\n"""
+
r
"""\}"""
)
response
=
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful AI assistant"
},
{
"role"
:
"user"
,
"content"
:
"Introduce the capital of France."
},
],
temperature
=
0
,
max_tokens
=
128
,
extra_body
=
{
"regex"
:
regex
},
)
text
=
response
.
choices
[
0
].
message
.
content
print
(
json
.
loads
(
text
))
print
(
"="
*
100
)
assert
response
.
choices
[
0
].
logprobs
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
tokens
[
0
],
str
)
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
[
1
],
dict
)
assert
len
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
[
1
])
==
logprobs
if
echo
:
assert
response
.
choices
[
0
].
logprobs
.
token_logprobs
[
0
]
==
None
else
:
assert
response
.
choices
[
0
].
logprobs
.
token_logprobs
[
0
]
!=
None
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
def
run_completion_stream
(
self
,
echo
,
logprobs
):
client
=
openai
.
Client
(
api_key
=
"EMPTY"
,
base_url
=
self
.
base_url
)
prompt
=
"The capital of France is"
generator
=
client
.
completions
.
create
(
model
=
self
.
model
,
prompt
=
prompt
,
temperature
=
0.1
,
max_tokens
=
32
,
echo
=
echo
,
logprobs
=
logprobs
,
stream
=
True
,
)
first
=
True
for
response
in
generator
:
if
logprobs
:
assert
response
.
choices
[
0
].
logprobs
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
tokens
[
0
],
str
)
if
not
(
first
and
echo
):
assert
isinstance
(
response
.
choices
[
0
].
logprobs
.
top_logprobs
[
0
],
dict
)
#assert len(response.choices[0].logprobs.top_logprobs[0]) == logprobs
if
first
:
if
echo
:
assert
response
.
choices
[
0
].
text
.
startswith
(
prompt
)
first
=
False
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
def
test_completion
(
self
):
for
echo
in
[
False
,
True
]:
for
logprobs
in
[
None
,
5
]:
self
.
run_completion
(
echo
,
logprobs
)
def
test_completion_stream
(
self
):
for
echo
in
[
True
]:
for
logprobs
in
[
5
]:
self
.
run_completion_stream
(
echo
,
logprobs
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--base-url"
,
type
=
str
,
default
=
"http://127.0.0.1:30000/v1"
)
parser
.
add_argument
(
"--test-image"
,
action
=
"store_true"
,
help
=
"Enables testing image inputs"
)
args
=
parser
.
parse_args
()
test_completion
(
args
,
echo
=
False
,
logprobs
=
False
)
test_completion
(
args
,
echo
=
True
,
logprobs
=
False
)
test_completion
(
args
,
echo
=
False
,
logprobs
=
True
)
test_completion
(
args
,
echo
=
True
,
logprobs
=
True
)
test_completion
(
args
,
echo
=
False
,
logprobs
=
3
)
test_completion
(
args
,
echo
=
True
,
logprobs
=
3
)
test_completion_stream
(
args
,
echo
=
False
,
logprobs
=
False
)
test_completion_stream
(
args
,
echo
=
True
,
logprobs
=
False
)
test_completion_stream
(
args
,
echo
=
False
,
logprobs
=
True
)
test_completion_stream
(
args
,
echo
=
True
,
logprobs
=
True
)
test_completion_stream
(
args
,
echo
=
False
,
logprobs
=
3
)
test_completion_stream
(
args
,
echo
=
True
,
logprobs
=
3
)
test_chat_completion
(
args
)
test_chat_completion_stream
(
args
)
test_regex
(
args
)
if
args
.
test_image
:
test_chat_completion_image
(
args
)
# unittest.main(warnings="ignore")
t
=
TestOpenAIServer
()
t
.
setUpClass
()
t
.
test_completion_stream
()
t
.
tearDownClass
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment