Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c61a1b6f
Unverified
Commit
c61a1b6f
authored
Aug 26, 2024
by
Liangsheng Yin
Committed by
GitHub
Aug 26, 2024
Browse files
Torch compile CI throughput test (#1223)
parent
9935f97b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
30 additions
and
1 deletion
+30
-1
test/srt/test_torch_compile.py
test/srt/test_torch_compile.py
+30
-1
No files found.
test/srt/test_torch_compile.py
View file @
c61a1b6f
import
unittest
import
unittest
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
import
requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
...
@@ -20,7 +22,7 @@ class TestTorchCompile(unittest.TestCase):
...
@@ -20,7 +22,7 @@ class TestTorchCompile(unittest.TestCase):
cls
.
model
,
cls
.
model
,
cls
.
base_url
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--enable-torch-compile"
],
other_args
=
[
"--enable-torch-compile"
,
"--disable-radix-cache"
],
)
)
@
classmethod
@
classmethod
...
@@ -39,6 +41,33 @@ class TestTorchCompile(unittest.TestCase):
...
@@ -39,6 +41,33 @@ class TestTorchCompile(unittest.TestCase):
metrics
=
run_eval
(
args
)
metrics
=
run_eval
(
args
)
assert
metrics
[
"score"
]
>=
0.6
assert
metrics
[
"score"
]
>=
0.6
def
run_decode
(
self
,
max_new_tokens
):
response
=
requests
.
post
(
self
.
base_url
+
"/generate"
,
json
=
{
"text"
:
"The capital of France is"
,
"sampling_params"
:
{
"temperature"
:
0
,
"max_new_tokens"
:
max_new_tokens
,
},
"ignore_eos"
:
True
,
},
)
return
response
.
json
()
def
test_throughput
(
self
):
import
time
max_tokens
=
256
tic
=
time
.
time
()
res
=
self
.
run_decode
(
max_tokens
)
tok
=
time
.
time
()
print
(
res
[
"text"
])
throughput
=
max_tokens
/
(
tok
-
tic
)
print
(
f
"Throughput:
{
throughput
}
tokens/s"
)
assert
throughput
>=
152
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment