Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
35759efa
Unverified
Commit
35759efa
authored
Jul 20, 2024
by
Lianmin Zheng
Committed by
GitHub
Jul 20, 2024
Browse files
Support random dataset in bench_serving.py (#669)
parent
8f4b1559
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
82 additions
and
15 deletions
+82
-15
python/sglang/bench_serving.py
python/sglang/bench_serving.py
+76
-9
python/sglang/srt/managers/controller/model_runner.py
python/sglang/srt/managers/controller/model_runner.py
+1
-1
python/sglang/srt/managers/io_struct.py
python/sglang/srt/managers/io_struct.py
+1
-1
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+4
-4
No files found.
python/sglang/bench_serving.py
View file @
35759efa
...
...
@@ -273,6 +273,37 @@ def sample_sharegpt_requests(
return
filtered_dataset
def
sample_random_requests
(
input_len
:
int
,
output_len
:
int
,
num_prompts
:
int
,
range_ratio
:
float
,
tokenizer
:
PreTrainedTokenizerBase
,
)
->
List
[
Tuple
[
str
,
int
,
int
]]:
input_lens
=
np
.
random
.
randint
(
int
(
input_len
*
range_ratio
),
input_len
+
1
,
size
=
num_prompts
,
)
output_lens
=
np
.
random
.
randint
(
int
(
output_len
*
range_ratio
),
output_len
+
1
,
size
=
num_prompts
,
)
offsets
=
np
.
random
.
randint
(
0
,
tokenizer
.
vocab_size
,
size
=
num_prompts
)
input_requests
=
[]
for
i
in
range
(
num_prompts
):
prompt
=
tokenizer
.
decode
(
[(
offsets
[
i
]
+
i
+
j
)
%
tokenizer
.
vocab_size
for
j
in
range
(
input_lens
[
i
])]
)
input_requests
.
append
((
prompt
,
int
(
input_lens
[
i
]),
int
(
output_lens
[
i
])))
print
(
f
"#Input tokens:
{
np
.
sum
(
input_lens
)
}
"
)
print
(
f
"#Output tokens:
{
np
.
sum
(
output_lens
)
}
"
)
return
input_requests
async
def
get_request
(
input_requests
:
List
[
Tuple
[
str
,
int
,
int
]],
request_rate
:
float
,
...
...
@@ -530,13 +561,23 @@ def fire(args: argparse.Namespace):
tokenizer
=
get_tokenizer
(
tokenizer_id
)
assert
args
.
dataset
is
not
None
input_requests
=
sample_sharegpt_requests
(
dataset_path
=
args
.
dataset
,
num_requests
=
args
.
num_prompts
,
tokenizer
=
tokenizer
,
fixed_output_len
=
args
.
sharegpt_output_len
,
)
if
args
.
dataset_name
==
"sharegpt"
:
input_requests
=
sample_sharegpt_requests
(
dataset_path
=
args
.
dataset_path
,
num_requests
=
args
.
num_prompts
,
tokenizer
=
tokenizer
,
fixed_output_len
=
args
.
sharegpt_output_len
,
)
elif
args
.
dataset_name
==
"random"
:
input_requests
=
sample_random_requests
(
input_len
=
args
.
random_input_len
,
output_len
=
args
.
random_output_len
,
num_prompts
=
args
.
num_prompts
,
range_ratio
=
args
.
random_range_ratio
,
tokenizer
=
tokenizer
,
)
else
:
raise
ValueError
(
f
"Unknown dataset:
{
args
.
dataset_name
}
"
)
asyncio
.
run
(
benchmark
(
...
...
@@ -589,7 +630,14 @@ if __name__ == "__main__":
help
=
"If not set, the default port is configured according to its default value for different LLM Inference Engines."
,
)
parser
.
add_argument
(
"--dataset"
,
type
=
str
,
default
=
"sharegpt"
,
help
=
"Path to the ShareGPT dataset"
"--dataset-name"
,
type
=
str
,
default
=
"sharegpt"
,
choices
=
[
"sharegpt"
,
"random"
],
help
=
"Name of the dataset to benchmark on."
,
)
parser
.
add_argument
(
"--dataset-path"
,
type
=
str
,
default
=
""
,
help
=
"Path to the dataset."
)
parser
.
add_argument
(
"--model"
,
...
...
@@ -613,10 +661,29 @@ if __name__ == "__main__":
default
=
None
,
help
=
"Output length for each request. Overrides the output length from the ShareGPT dataset."
,
)
parser
.
add_argument
(
"--random-input-len"
,
type
=
int
,
default
=
1024
,
help
=
"Number of input tokens per request, used only for random dataset."
,
)
parser
.
add_argument
(
"--random-output-len"
,
type
=
int
,
default
=
128
,
help
=
"Number of output tokens per request, used only for random dataset."
,
)
parser
.
add_argument
(
"--random-range-ratio"
,
type
=
float
,
default
=
1.0
,
help
=
"Range of sampled ratio of input/output length, "
"used only for random dataset."
,
)
parser
.
add_argument
(
"--request-rate"
,
type
=
float
,
default
=
128.0
,
default
=
float
(
"inf"
)
,
help
=
"Number of requests per second. If this is inf, then all the requests are sent at time 0. "
"Otherwise, we use Poisson process to synthesize the request arrival times. Default is 128.0."
,
)
...
...
python/sglang/srt/managers/controller/model_runner.py
View file @
35759efa
...
...
@@ -233,7 +233,7 @@ class ModelRunner:
return
logger
.
info
(
f
"[gpu_id=
{
self
.
gpu_id
}
] Capture cuda graph begin."
)
batch_size_list
=
[
1
,
2
,
4
]
+
[
i
*
8
for
i
in
range
(
1
,
1
6
)]
batch_size_list
=
[
1
,
2
,
4
]
+
[
i
*
8
for
i
in
range
(
1
,
1
7
)]
self
.
cuda_graph_runner
=
CudaGraphRunner
(
self
,
max_batch_size_to_capture
=
max
(
batch_size_list
)
)
...
...
python/sglang/srt/managers/io_struct.py
View file @
35759efa
...
...
@@ -40,7 +40,7 @@ class GenerateReqInput:
self
.
text
is
not
None
and
self
.
input_ids
is
not
None
):
raise
ValueError
(
"Either text or input_ids should be provided."
)
if
"n"
in
self
.
sampling_params
and
self
.
sampling_params
[
"n"
]
!=
1
:
if
self
.
sampling_params
.
get
(
"n"
,
1
)
!=
1
:
is_single
=
False
else
:
if
self
.
text
is
not
None
:
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
35759efa
...
...
@@ -196,14 +196,14 @@ class TokenizerManager:
event
=
asyncio
.
Event
()
state
=
ReqState
([],
False
,
event
)
self
.
rid_to_state
[
rid
]
=
state
if
is_prefill
==
False
:
if
is_prefill
:
await
self
.
_wait_for_prefill_response
(
event
,
state
,
obj
,
request
,
rid
)
yield
input_ids
else
:
async
for
response
in
self
.
_wait_for_response
(
event
,
state
,
obj
,
rid
,
request
):
yield
response
else
:
await
self
.
_wait_for_prefill_response
(
event
,
state
,
obj
,
request
,
rid
)
yield
input_ids
async
def
_handle_batch_request
(
self
,
obj
,
request
):
batch_size
=
obj
.
batch_size
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment