Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
795b662c
Unverified
Commit
795b662c
authored
Sep 06, 2024
by
Wei-Sheng Chin
Committed by
GitHub
Sep 06, 2024
Browse files
Enable Random Prefix Caching in Serving Profiling Tool (benchmark_serving.py) (#8241)
parent
2f707fcb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
4 deletions
+23
-4
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+23
-4
No files found.
benchmarks/benchmark_serving.py
View file @
795b662c
...
...
@@ -195,8 +195,16 @@ def sample_sonnet_requests(
def
sample_random_requests
(
input_len
:
int
,
output_len
:
int
,
num_prompts
:
int
,
range_ratio
:
float
,
tokenizer
:
PreTrainedTokenizerBase
)
->
List
[
Tuple
[
str
,
int
,
int
]]:
prefix_len
:
int
,
input_len
:
int
,
output_len
:
int
,
num_prompts
:
int
,
range_ratio
:
float
,
tokenizer
:
PreTrainedTokenizerBase
,
)
->
List
[
Tuple
[
str
,
int
,
int
]]:
prefix_token_ids
=
np
.
random
.
randint
(
0
,
tokenizer
.
vocab_size
,
size
=
prefix_len
).
tolist
()
input_lens
=
np
.
random
.
randint
(
int
(
input_len
*
range_ratio
),
...
...
@@ -211,10 +219,12 @@ def sample_random_requests(
offsets
=
np
.
random
.
randint
(
0
,
tokenizer
.
vocab_size
,
size
=
num_prompts
)
input_requests
=
[]
for
i
in
range
(
num_prompts
):
prompt
=
tokenizer
.
decode
([(
offsets
[
i
]
+
i
+
j
)
%
tokenizer
.
vocab_size
prompt
=
tokenizer
.
decode
(
prefix_token_ids
+
[(
offsets
[
i
]
+
i
+
j
)
%
tokenizer
.
vocab_size
for
j
in
range
(
input_lens
[
i
])])
input_requests
.
append
(
(
prompt
,
int
(
input_lens
[
i
]),
int
(
output_lens
[
i
])))
(
prompt
,
int
(
prefix_len
+
input_lens
[
i
]),
int
(
output_lens
[
i
])))
return
input_requests
...
...
@@ -567,6 +577,7 @@ def main(args: argparse.Namespace):
elif
args
.
dataset_name
==
"random"
:
input_requests
=
sample_random_requests
(
prefix_len
=
args
.
random_prefix_len
,
input_len
=
args
.
random_input_len
,
output_len
=
args
.
random_output_len
,
num_prompts
=
args
.
num_prompts
,
...
...
@@ -765,6 +776,14 @@ if __name__ == "__main__":
help
=
"Range of sampled ratio of input/output length, "
"used only for random sampling."
,
)
parser
.
add_argument
(
"--random-prefix-len"
,
type
=
int
,
default
=
0
,
help
=
"Number of fixed prefix tokens before random "
" context. The length range of context in a random "
" request is [random-prefix-len, "
" random-prefix-len + random-prefix-len * random-range-ratio)."
)
parser
.
add_argument
(
"--request-rate"
,
type
=
float
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment