Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
65bd1338
Unverified
Commit
65bd1338
authored
Jul 22, 2024
by
zhyncs
Committed by
GitHub
Jul 22, 2024
Browse files
misc: recommend to use chat model for benchmark (#690)
parent
eedc12e1
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
0 deletions
+16
-0
python/sglang/bench_serving.py
python/sglang/bench_serving.py
+16
-0
No files found.
python/sglang/bench_serving.py
View file @
65bd1338
...
@@ -630,6 +630,7 @@ async def benchmark(
...
@@ -630,6 +630,7 @@ async def benchmark(
"random_input_len"
:
args
.
random_input_len
,
"random_input_len"
:
args
.
random_input_len
,
"random_output_len"
:
args
.
random_output_len
,
"random_output_len"
:
args
.
random_output_len
,
"random_range_ratio"
:
args
.
random_range_ratio
,
"random_range_ratio"
:
args
.
random_range_ratio
,
"benchmark_duration"
:
benchmark_duration
,
}
}
else
:
else
:
print
(
f
"Error running benchmark for request rate:
{
request_rate
}
"
)
print
(
f
"Error running benchmark for request rate:
{
request_rate
}
"
)
...
@@ -687,6 +688,15 @@ def parse_request_rate_range(request_rate_range):
...
@@ -687,6 +688,15 @@ def parse_request_rate_range(request_rate_range):
return
list
(
map
(
int
,
request_rate_range
.
split
(
","
)))
return
list
(
map
(
int
,
request_rate_range
.
split
(
","
)))
def
check_chat_template
(
model_path
):
try
:
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_path
,
trust_remote_code
=
True
)
return
"chat_template"
in
tokenizer
.
init_kwargs
except
Exception
as
e
:
print
(
f
"Fail to load tokenizer config with error=
{
e
}
"
)
return
False
def
fire
(
args
:
argparse
.
Namespace
):
def
fire
(
args
:
argparse
.
Namespace
):
random
.
seed
(
args
.
seed
)
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
...
@@ -736,6 +746,12 @@ def fire(args: argparse.Namespace):
...
@@ -736,6 +746,12 @@ def fire(args: argparse.Namespace):
print
(
"No model specified or found. Please provide a model using `--model`."
)
print
(
"No model specified or found. Please provide a model using `--model`."
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
if
not
check_chat_template
(
args
.
model
):
print
(
"
\n
WARNING It is recommended to use the `Chat` or `Instruct` model for benchmarking.
\n
"
"Because when the tokenizer counts the output tokens, if there is gibberish, it might count incorrectly.
\n
"
)
print
(
f
"
{
args
}
\n
"
)
print
(
f
"
{
args
}
\n
"
)
backend
=
args
.
backend
backend
=
args
.
backend
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment