Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
d9a69029
"vscode:/vscode.git/clone" did not exist on "42180bd929639c137706be99c93626da93a36f2e"
Unverified
Commit
d9a69029
authored
Jul 11, 2024
by
Lianmin Zheng
Committed by
GitHub
Jul 11, 2024
Browse files
Fix bench latency (#607)
parent
ad872feb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
3 deletions
+8
-3
python/sglang/bench_latency.py
python/sglang/bench_latency.py
+8
-3
No files found.
python/sglang/bench_latency.py
View file @
d9a69029
...
...
@@ -30,8 +30,10 @@ import argparse
import
dataclasses
import
logging
import
multiprocessing
import
os
import
time
import
numpy
as
np
import
torch
import
torch.distributed
as
dist
...
...
@@ -70,6 +72,7 @@ class BenchArgs:
def
load_model
(
server_args
,
tp_rank
):
suppress_other_loggers
()
rank_print
=
print
if
tp_rank
==
0
else
lambda
*
args
,
**
kwargs
:
None
model_config
=
ModelConfig
(
path
=
server_args
.
model_path
)
model_runner
=
ModelRunner
(
...
...
@@ -81,7 +84,7 @@ def load_model(server_args, tp_rank):
nccl_port
=
28888
,
server_args
=
server_args
,
)
print
(
f
"max_total_num_tokens=
{
model_runner
.
max_total_num_tokens
}
"
)
rank_
print
(
f
"max_total_num_tokens=
{
model_runner
.
max_total_num_tokens
}
"
)
tokenizer
=
get_tokenizer
(
server_args
.
tokenizer_path
,
tokenizer_mode
=
server_args
.
tokenizer_mode
,
...
...
@@ -201,7 +204,7 @@ def correctness_test(
# Print
for
i
in
range
(
len
(
reqs
)):
print
(
tokenizer
.
decode
(
output_ids
[
i
]))
rank_
print
(
tokenizer
.
decode
(
output_ids
[
i
]))
def
latency_test
(
...
...
@@ -213,7 +216,7 @@ def latency_test(
# Load the model
model_runner
,
tokenizer
=
load_model
(
server_args
,
tp_rank
)
print
(
rank_
print
(
f
"max_batch_size=
{
model_runner
.
max_total_num_tokens
//
(
bench_args
.
input_len
+
bench_args
.
output_len
)
}
"
)
...
...
@@ -299,6 +302,8 @@ def main(server_args, bench_args):
for
proc
in
workers
:
proc
.
join
()
proc
.
terminate
()
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment