Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
268c3250
Unverified
Commit
268c3250
authored
Apr 11, 2025
by
WWW
Committed by
GitHub
Apr 10, 2025
Browse files
Fix range_ratio Bug in RandomDataset (#16126)
Signed-off-by:
jadewang21
<
jadewangcn@outlook.com
>
parent
3cc9af88
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
42 additions
and
20 deletions
+42
-20
benchmarks/benchmark_dataset.py
benchmarks/benchmark_dataset.py
+18
-5
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+12
-7
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+12
-8
No files found.
benchmarks/benchmark_dataset.py
View file @
268c3250
...
@@ -288,7 +288,7 @@ def process_image(image: Any) -> Mapping[str, Any]:
...
@@ -288,7 +288,7 @@ def process_image(image: Any) -> Mapping[str, Any]:
class
RandomDataset
(
BenchmarkDataset
):
class
RandomDataset
(
BenchmarkDataset
):
# Default values copied from benchmark_serving.py for the random dataset.
# Default values copied from benchmark_serving.py for the random dataset.
DEFAULT_PREFIX_LEN
=
0
DEFAULT_PREFIX_LEN
=
0
DEFAULT_RANGE_RATIO
=
1
.0
DEFAULT_RANGE_RATIO
=
0
.0
DEFAULT_INPUT_LEN
=
1024
DEFAULT_INPUT_LEN
=
1024
DEFAULT_OUTPUT_LEN
=
128
DEFAULT_OUTPUT_LEN
=
128
...
@@ -308,19 +308,32 @@ class RandomDataset(BenchmarkDataset):
...
@@ -308,19 +308,32 @@ class RandomDataset(BenchmarkDataset):
output_len
:
int
=
DEFAULT_OUTPUT_LEN
,
output_len
:
int
=
DEFAULT_OUTPUT_LEN
,
**
kwargs
,
**
kwargs
,
)
->
list
[
SampleRequest
]:
)
->
list
[
SampleRequest
]:
# Enforce range_ratio < 1
assert
range_ratio
<
1.0
,
(
"random_range_ratio must be < 1.0 to ensure a valid sampling range"
)
vocab_size
=
tokenizer
.
vocab_size
vocab_size
=
tokenizer
.
vocab_size
prefix_token_ids
=
(
np
.
random
.
randint
(
prefix_token_ids
=
(
np
.
random
.
randint
(
0
,
vocab_size
,
size
=
prefix_len
).
tolist
()
if
prefix_len
>
0
else
[])
0
,
vocab_size
,
size
=
prefix_len
).
tolist
()
if
prefix_len
>
0
else
[])
input_low
=
int
(
input_len
*
range_ratio
)
# New sampling logic: [X * (1 - b), X * (1 + b)]
output_low
=
int
(
output_len
*
range_ratio
)
input_low
=
int
(
input_len
*
(
1
-
range_ratio
))
input_high
=
int
(
input_len
*
(
1
+
range_ratio
))
output_low
=
int
(
output_len
*
(
1
-
range_ratio
))
output_high
=
int
(
output_len
*
(
1
+
range_ratio
))
# Add logging for debugging
logger
.
info
(
"Sampling input_len from [%s, %s]"
,
input_low
,
input_high
)
logger
.
info
(
"Sampling output_len from [%s, %s]"
,
output_low
,
output_high
)
input_lens
=
np
.
random
.
randint
(
input_low
,
input_lens
=
np
.
random
.
randint
(
input_low
,
input_
len
+
1
,
input_
high
+
1
,
size
=
num_requests
)
size
=
num_requests
)
output_lens
=
np
.
random
.
randint
(
output_low
,
output_lens
=
np
.
random
.
randint
(
output_low
,
output_
len
+
1
,
output_
high
+
1
,
size
=
num_requests
)
size
=
num_requests
)
offsets
=
np
.
random
.
randint
(
0
,
vocab_size
,
size
=
num_requests
)
offsets
=
np
.
random
.
randint
(
0
,
vocab_size
,
size
=
num_requests
)
...
...
benchmarks/benchmark_serving.py
View file @
268c3250
...
@@ -996,18 +996,23 @@ if __name__ == "__main__":
...
@@ -996,18 +996,23 @@ if __name__ == "__main__":
random_group
.
add_argument
(
random_group
.
add_argument
(
"--random-range-ratio"
,
"--random-range-ratio"
,
type
=
float
,
type
=
float
,
default
=
1.0
,
default
=
0.0
,
help
=
"Range of sampled ratio of input/output length, "
help
=
"Range ratio for sampling input/output length, "
"used only for random sampling."
,
"used only for random sampling. Must be in the range [0, 1) to define "
"a symmetric sampling range"
"[length * (1 - range_ratio), length * (1 + range_ratio)]."
,
)
)
random_group
.
add_argument
(
random_group
.
add_argument
(
"--random-prefix-len"
,
"--random-prefix-len"
,
type
=
int
,
type
=
int
,
default
=
0
,
default
=
0
,
help
=
"Number of fixed prefix tokens before random "
help
=
(
"Number of fixed prefix tokens before the random context "
" context. The length range of context in a random "
"in a request. "
" request is [random-prefix-len, "
"The total input length is the sum of `random-prefix-len` and "
" random-prefix-len + random-prefix-len * random-range-ratio)."
)
"a random "
"context length sampled from [input_len * (1 - range_ratio), "
"input_len * (1 + range_ratio)]."
),
)
hf_group
=
parser
.
add_argument_group
(
"hf dataset options"
)
hf_group
=
parser
.
add_argument_group
(
"hf dataset options"
)
hf_group
.
add_argument
(
"--hf-subset"
,
hf_group
.
add_argument
(
"--hf-subset"
,
...
...
benchmarks/benchmark_throughput.py
View file @
268c3250
...
@@ -594,18 +594,22 @@ if __name__ == "__main__":
...
@@ -594,18 +594,22 @@ if __name__ == "__main__":
default
=
None
,
default
=
None
,
help
=
"Path to the lora adapters to use. This can be an absolute path, "
help
=
"Path to the lora adapters to use. This can be an absolute path, "
"a relative path, or a Hugging Face model identifier."
)
"a relative path, or a Hugging Face model identifier."
)
parser
.
add_argument
(
"--prefix-len"
,
parser
.
add_argument
(
type
=
int
,
"--prefix-len"
,
default
=
None
,
type
=
int
,
help
=
"Number of prefix tokens per request."
default
=
0
,
"This is for the RandomDataset and SonnetDataset"
)
help
=
"Number of fixed prefix tokens before the random "
"context in a request (default: 0)."
,
)
# random dataset
# random dataset
parser
.
add_argument
(
parser
.
add_argument
(
"--random-range-ratio"
,
"--random-range-ratio"
,
type
=
float
,
type
=
float
,
default
=
None
,
default
=
0.0
,
help
=
"Range of sampled ratio of input/output length, "
help
=
"Range ratio for sampling input/output length, "
"used only for RandomDataSet."
,
"used only for RandomDataset. Must be in the range [0, 1) to define "
"a symmetric sampling range "
"[length * (1 - range_ratio), length * (1 + range_ratio)]."
,
)
)
# hf dtaset
# hf dtaset
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment