Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
995af5a5
Unverified
Commit
995af5a5
authored
Aug 03, 2024
by
Ying Sheng
Committed by
GitHub
Aug 03, 2024
Browse files
Improve the structure of CI (#911)
parent
53985645
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
164 additions
and
60 deletions
+164
-60
scripts/launch_tgi.sh
scripts/launch_tgi.sh
+0
-6
test/README.md
test/README.md
+26
-0
test/lang/run_suite.py
test/lang/run_suite.py
+5
-45
test/srt/models/test_causal_models.py
test/srt/models/test_causal_models.py
+1
-0
test/srt/run_suite.py
test/srt/run_suite.py
+40
-0
test/srt/test_chunked_prefill.py
test/srt/test_chunked_prefill.py
+45
-0
test/srt/test_eval_accuracy.py
test/srt/test_eval_accuracy.py
+2
-5
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+3
-4
test/srt/test_torch_compile.py
test/srt/test_torch_compile.py
+42
-0
No files found.
scripts/launch_tgi.sh
deleted
100644 → 0
View file @
53985645
docker run
--name
tgi
--rm
-ti
--gpus
all
--network
host
\
-v
/home/ubuntu/model_weights/Llama-2-7b-chat-hf:/Llama-2-7b-chat-hf
\
ghcr.io/huggingface/text-generation-inference:1.3.0
\
--model-id
/Llama-2-7b-chat-hf
--num-shard
1
--trust-remote-code
\
--max-input-length
2048
--max-total-tokens
4096
\
--port
24000
test/README.md
0 → 100644
View file @
995af5a5
# Run Unit Tests
## Test Frontend Language
```
cd sglang/test/lang
export OPENAI_API_KEY=sk-*****
# Run a single file
python3 test_openai_backend.py
# Run a suite
python3 run_suite.py --suite minimal
```
## Test Backend Runtime
```
cd sglang/test/srt
# Run a single file
python3 test_eval_accuracy.py
# Run a suite
python3 run_suite.py --suite minimal
```
test/lang/run_suite.py
View file @
995af5a5
import
argparse
import
glob
import
multiprocessing
import
os
import
time
import
unittest
from
sglang.utils
import
run_
with_timeout
from
sglang.
test.test_
utils
import
run_
unittest_files
suites
=
{
"minimal"
:
[
"test_
openai
_backend.py"
,
"test_
srt
_backend.py"
],
"minimal"
:
[
"test_
srt
_backend.py"
,
"test_
openai
_backend.py"
],
}
def
run_unittest_files
(
files
,
args
):
for
filename
in
files
:
def
func
():
print
(
filename
)
ret
=
unittest
.
main
(
module
=
None
,
argv
=
[
""
,
"-vb"
]
+
[
filename
])
p
=
multiprocessing
.
Process
(
target
=
func
)
def
run_one_file
():
p
.
start
()
p
.
join
()
try
:
run_with_timeout
(
run_one_file
,
timeout
=
args
.
time_limit_per_file
)
if
p
.
exitcode
!=
0
:
return
False
except
TimeoutError
:
p
.
terminate
()
time
.
sleep
(
5
)
print
(
f
"
\n
Timeout after
{
args
.
time_limit_per_file
}
seconds "
f
"when running
{
filename
}
"
)
return
False
return
True
if
__name__
==
"__main__"
:
arg_parser
=
argparse
.
ArgumentParser
()
arg_parser
.
add_argument
(
"--time
-limi
t-per-file"
,
"--time
ou
t-per-file"
,
type
=
int
,
default
=
1000
,
help
=
"The time limit for running one file in seconds."
,
...
...
@@ -63,12 +30,5 @@ if __name__ == "__main__":
else
:
files
=
suites
[
args
.
suite
]
tic
=
time
.
time
()
success
=
run_unittest_files
(
files
,
args
)
if
success
:
print
(
f
"Success. Time elapsed:
{
time
.
time
()
-
tic
:.
2
f
}
s"
)
else
:
print
(
f
"Fail. Time elapsed:
{
time
.
time
()
-
tic
:.
2
f
}
s"
)
exit
(
0
if
success
else
-
1
)
exit_code
=
run_unittest_files
(
files
,
args
.
timeout_per_file
)
exit
(
exit_code
)
test/srt/models/test_causal_models.py
View file @
995af5a5
...
...
@@ -18,6 +18,7 @@ import torch
from
sglang.test.runners
import
DEFAULT_PROMPTS
,
HFRunner
,
SRTRunner
MODELS
=
[
# (model_name, tp_size)
(
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
1
),
# ("meta-llama/Meta-Llama-3.1-8B-Instruct", 2),
]
...
...
test/srt/run_suite.py
0 → 100644
View file @
995af5a5
import
argparse
import
glob
from
sglang.test.test_utils
import
run_unittest_files
suites
=
{
"minimal"
:
[
"test_openai_server.py"
,
"test_eval_accuracy.py"
,
"test_chunked_prefill.py"
,
"test_torch_compile.py"
,
"models/test_causal_models.py"
,
],
}
if
__name__
==
"__main__"
:
arg_parser
=
argparse
.
ArgumentParser
()
arg_parser
.
add_argument
(
"--timeout-per-file"
,
type
=
int
,
default
=
1000
,
help
=
"The time limit for running one file in seconds."
,
)
arg_parser
.
add_argument
(
"--suite"
,
type
=
str
,
default
=
list
(
suites
.
keys
())[
0
],
choices
=
list
(
suites
.
keys
())
+
[
"all"
],
help
=
"The suite to run"
,
)
args
=
arg_parser
.
parse_args
()
if
args
.
suite
==
"all"
:
files
=
glob
.
glob
(
"**/test_*.py"
,
recursive
=
True
)
else
:
files
=
suites
[
args
.
suite
]
exit_code
=
run_unittest_files
(
files
,
args
.
timeout_per_file
)
exit
(
exit_code
)
test/srt/test_chunked_prefill.py
0 → 100644
View file @
995af5a5
import
unittest
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
MODEL_NAME_FOR_TEST
,
popen_launch_server
class
TestAccuracy
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://localhost:30000"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
other_args
=
[
"--chunked-prefill-size"
,
"32"
],
)
@
classmethod
def
tearDownClass
(
cls
):
kill_child_process
(
cls
.
process
.
pid
)
def
test_mmlu
(
self
):
args
=
SimpleNamespace
(
base_url
=
self
.
base_url
,
model
=
self
.
model
,
eval_name
=
"mmlu"
,
num_examples
=
20
,
num_threads
=
20
,
)
metrics
=
run_eval
(
args
)
assert
metrics
[
"score"
]
>=
0.5
if
__name__
==
"__main__"
:
unittest
.
main
(
warnings
=
"ignore"
)
# t = TestAccuracy()
# t.setUpClass()
# t.test_mmlu()
# t.tearDownClass()
test/srt/test_eval_accuracy.py
View file @
995af5a5
import
json
import
unittest
from
types
import
SimpleNamespace
...
...
@@ -11,11 +10,9 @@ class TestAccuracy(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
port
=
30000
cls
.
model
=
MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://localhost:
{
port
}
"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
port
,
timeout
=
300
)
cls
.
base_url
=
f
"http://localhost:
30000
"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
)
@
classmethod
def
tearDownClass
(
cls
):
...
...
test/srt/test_openai_server.py
View file @
995af5a5
...
...
@@ -11,11 +11,10 @@ class TestOpenAIServer(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
port
=
30000
cls
.
model
=
MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://localhost:
{
port
}
/v1"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
port
,
timeout
=
300
)
cls
.
base_url
=
f
"http://localhost:30000"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
)
cls
.
base_url
+=
"/v1"
@
classmethod
def
tearDownClass
(
cls
):
...
...
test/srt/test_torch_compile.py
0 → 100644
View file @
995af5a5
import
unittest
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
MODEL_NAME_FOR_TEST
,
popen_launch_server
class
TestAccuracy
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://localhost:30000"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
other_args
=
[
"--enable-torch-compile"
]
)
@
classmethod
def
tearDownClass
(
cls
):
kill_child_process
(
cls
.
process
.
pid
)
def
test_mmlu
(
self
):
args
=
SimpleNamespace
(
base_url
=
self
.
base_url
,
model
=
self
.
model
,
eval_name
=
"mmlu"
,
num_examples
=
20
,
num_threads
=
20
,
)
metrics
=
run_eval
(
args
)
assert
metrics
[
"score"
]
>=
0.5
if
__name__
==
"__main__"
:
unittest
.
main
(
warnings
=
"ignore"
)
# t = TestAccuracy()
# t.setUpClass()
# t.test_mmlu()
# t.tearDownClass()
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment