Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2558d6a6
"tests/vscode:/vscode.git/clone" did not exist on "f9e957f011c06ff31f854a281cb7b485d74cdf53"
Unverified
Commit
2558d6a6
authored
Nov 15, 2024
by
Lianmin Zheng
Committed by
GitHub
Nov 15, 2024
Browse files
Fix the default arguments of bench_offline_throughput.py & simplify detokenizer manager (#2042)
parent
29ebe3df
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
42 additions
and
38 deletions
+42
-38
python/sglang/bench_offline_throughput.py
python/sglang/bench_offline_throughput.py
+22
-18
python/sglang/bench_serving.py
python/sglang/bench_serving.py
+2
-0
python/sglang/srt/managers/detokenizer_manager.py
python/sglang/srt/managers/detokenizer_manager.py
+0
-14
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+6
-2
test/srt/test_overlap_schedule.py
test/srt/test_overlap_schedule.py
+12
-4
No files found.
python/sglang/bench_offline_throughput.py
View file @
2558d6a6
"""
Benchmark the throughput of using the offline LLM engine.
This script does not launch a server.
It accepts
the same
arguments as launch_server.py and
additional
benchmark arguments
It accepts
server
arguments
(the same
as launch_server.py
)
and benchmark arguments
(the same as bench_serving.py).
# Usage
## Sharegpt dataset with default args
python -m sglang.bench_offline_throughput --model-path meta-llama/Meta-Llama-3-8B-Instruct
python -m sglang.bench_offline_throughput --model-path meta-llama/Meta-Llama-3
.1
-8B-Instruct
## Random dataset with default args
python -m sglang.bench_offline_throughput --model-path meta-llama/Meta-Llama-3-8B-Instruct --dataset-name random
python -m sglang.bench_offline_throughput --model-path meta-llama/Meta-Llama-3
.1
-8B-Instruct --dataset-name random
## Shared prefix dataset with default args
python -m sglang.bench_offline_throughput --model-path meta-llama/Meta-Llama-3-8B-Instruct --dataset-name generated-shared-prefix
python -m sglang.bench_offline_throughput --model-path meta-llama/Meta-Llama-3
.1
-8B-Instruct --dataset-name generated-shared-prefix
## Sharegpt dataset on runtime backend
python -m sglang.bench_offline_throughput --model-path meta-llama/Meta-Llama-3-8B-Instruct --backend runtime
python -m sglang.bench_offline_throughput --model-path meta-llama/Meta-Llama-3
.1
-8B-Instruct --backend runtime
"""
import
argparse
...
...
@@ -23,7 +23,7 @@ import json
import
logging
import
random
import
time
from
typing
import
List
,
Tuple
from
typing
import
List
,
Optional
,
Tuple
import
numpy
as
np
...
...
@@ -45,14 +45,15 @@ class BenchArgs:
dataset_name
:
str
=
"sharegpt"
dataset_path
:
str
=
""
num_prompts
:
int
=
1000
sharegpt_output_len
:
int
=
256
random_input_len
:
int
=
256
random_output_len
:
int
=
256
sharegpt_output_len
:
Optional
[
int
]
=
None
random_input_len
:
int
=
1024
random_output_len
:
int
=
1024
random_range_ratio
:
float
=
0.0
gen_num_groups
:
int
=
8
gen_num_groups
:
int
=
64
gen_prompts_per_group
:
int
=
16
gen_system_prompt_len
:
int
=
128
gen_question_len
:
int
=
256
gen_system_prompt_len
:
int
=
2048
gen_question_len
:
int
=
128
gen_output_len
:
int
=
256
disable_ignore_eos
:
bool
=
False
seed
:
int
=
1
...
...
@@ -129,6 +130,12 @@ class BenchArgs:
default
=
BenchArgs
.
gen_question_len
,
help
=
"Question length, used"
"only for generate-shared-prefix"
,
)
parser
.
add_argument
(
"--gen-output-len"
,
type
=
int
,
default
=
BenchArgs
.
gen_output_len
,
help
=
"Target length in tokens for outputs in generated-shared-prefix dataset"
,
)
parser
.
add_argument
(
"--disable-ignore-eos"
,
type
=
bool
,
...
...
@@ -139,12 +146,8 @@ class BenchArgs:
@
classmethod
def
from_cli_args
(
cls
,
args
:
argparse
.
Namespace
):
# use the default value's type to case the args into correct types.
attrs
=
[(
attr
.
name
,
type
(
attr
.
default
))
for
attr
in
dataclasses
.
fields
(
cls
)]
print
(
attrs
)
return
cls
(
**
{
attr
:
attr_type
(
getattr
(
args
,
attr
))
for
attr
,
attr_type
in
attrs
}
)
attrs
=
[
attr
.
name
for
attr
in
dataclasses
.
fields
(
cls
)]
return
cls
(
**
{
attr
:
getattr
(
args
,
attr
)
for
attr
in
attrs
})
def
throughput_test_once
(
...
...
@@ -224,6 +227,7 @@ def throughput_test(
random
.
seed
(
bench_args
.
seed
)
np
.
random
.
seed
(
bench_args
.
seed
)
# Read dataset
input_requests
=
get_dataset
(
bench_args
,
tokenizer
)
warmup_requests
=
sample_random_requests
(
...
...
python/sglang/bench_serving.py
View file @
2558d6a6
...
...
@@ -1241,10 +1241,12 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--random-input-len"
,
type
=
int
,
default
=
1024
,
help
=
"Number of input tokens per request, used only for random dataset."
,
)
parser
.
add_argument
(
"--random-output-len"
,
default
=
1024
,
type
=
int
,
help
=
"Number of output tokens per request, used only for random dataset."
,
)
...
...
python/sglang/srt/managers/detokenizer_manager.py
View file @
2558d6a6
...
...
@@ -100,20 +100,6 @@ class DetokenizerManager:
if
isinstance
(
recv_obj
,
BatchEmbeddingOut
):
# If it is embedding model, no detokenization is needed.
self
.
send_to_tokenizer
.
send_pyobj
(
BatchEmbeddingOut
(
rids
=
recv_obj
.
rids
,
embeddings
=
recv_obj
.
embeddings
,
meta_info
=
recv_obj
.
meta_info
,
finished_reason
=
recv_obj
.
finished_reason
,
)
)
continue
elif
isinstance
(
recv_obj
,
UpdateWeightReqOutput
):
# If it is a weight update request, no detokenization is needed.
self
.
send_to_tokenizer
.
send_pyobj
(
recv_obj
)
continue
elif
isinstance
(
recv_obj
,
GetMemPoolSizeReqOutput
):
self
.
send_to_tokenizer
.
send_pyobj
(
recv_obj
)
continue
else
:
...
...
python/sglang/srt/managers/scheduler.py
View file @
2558d6a6
...
...
@@ -114,6 +114,9 @@ class Scheduler:
self
.
recv_from_tokenizer
=
get_zmq_socket
(
context
,
zmq
.
PULL
,
port_args
.
scheduler_input_ipc_name
)
self
.
send_to_tokenizer
=
get_zmq_socket
(
context
,
zmq
.
PUSH
,
port_args
.
tokenizer_ipc_name
)
if
server_args
.
skip_tokenizer_init
:
# Directly send to the tokenizer/api
...
...
@@ -127,6 +130,7 @@ class Scheduler:
)
else
:
self
.
recv_from_tokenizer
=
None
self
.
send_to_tokenizer
=
SimpleNamespace
(
send_pyobj
=
lambda
x
:
None
)
self
.
send_to_detokenizer
=
SimpleNamespace
(
send_pyobj
=
lambda
x
:
None
)
# Init tokenizer
...
...
@@ -421,7 +425,7 @@ class Scheduler:
self
.
abort_request
(
recv_req
)
elif
isinstance
(
recv_req
,
UpdateWeightReqInput
):
success
,
message
=
self
.
update_weights
(
recv_req
)
self
.
send_to_
de
tokenizer
.
send_pyobj
(
self
.
send_to_tokenizer
.
send_pyobj
(
UpdateWeightReqOutput
(
success
,
message
)
)
elif
isinstance
(
recv_req
,
ProfileReq
):
...
...
@@ -430,7 +434,7 @@ class Scheduler:
else
:
self
.
stop_profile
()
elif
isinstance
(
recv_req
,
GetMemPoolSizeReq
):
self
.
send_to_
de
tokenizer
.
send_pyobj
(
self
.
send_to_tokenizer
.
send_pyobj
(
GetMemPoolSizeReqOutput
(
self
.
max_total_num_tokens
)
)
else
:
...
...
test/srt/test_overlap_schedule.py
View file @
2558d6a6
...
...
@@ -11,16 +11,24 @@ from sglang.test.test_utils import run_mmlu_test
class
TestOverlapSchedule
(
unittest
.
TestCase
):
def
test_no_radix_attention_chunked_prefill
(
self
):
run_mmlu_test
(
disable_radix_cache
=
True
,
chunked_prefill_size
=
32
)
run_mmlu_test
(
disable_radix_cache
=
True
,
chunked_prefill_size
=
32
,
enable_overlap
=
True
)
def
test_no_radix_attention_no_chunked_prefill
(
self
):
run_mmlu_test
(
disable_radix_cache
=
True
,
chunked_prefill_size
=-
1
)
run_mmlu_test
(
disable_radix_cache
=
True
,
chunked_prefill_size
=-
1
,
enable_overlap
=
True
)
def
test_radix_attention_chunked_prefill
(
self
):
run_mmlu_test
(
disable_radix_cache
=
False
,
chunked_prefill_size
=
32
)
run_mmlu_test
(
disable_radix_cache
=
False
,
chunked_prefill_size
=
32
,
enable_overlap
=
True
)
def
test_radix_attention_no_chunked_prefill
(
self
):
run_mmlu_test
(
disable_radix_cache
=
False
,
chunked_prefill_size
=-
1
)
run_mmlu_test
(
disable_radix_cache
=
False
,
chunked_prefill_size
=-
1
,
enable_overlap
=
True
)
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment