Unverified Commit 23583ee2 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Add Assertion for `random-input-len` / `random-output-len` (#26834)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent 01c977e9
......@@ -478,6 +478,22 @@ class RandomDataset(BenchmarkDataset):
batchsize: int = 1,
**kwargs,
) -> list[SampleRequest]:
# validate total input tokens (prefix + sampled) is at least 1.
num_special = int(tokenizer.num_special_tokens_to_add())
real_input_len = max(0, int(input_len) - num_special)
min_sampled_input = math.floor(real_input_len * (1.0 - float(range_ratio)))
min_total_input = int(prefix_len) + min_sampled_input
if min_total_input < 1:
raise ValueError(
"--random-input-len is too small: with tokenizer special "
f"tokens {num_special} and --random-range-ratio {range_ratio}, "
"the minimum possible total input tokens (prefix + sampled) is "
f"{min_total_input}. Increase --random-input-len and/or "
"--random-prefix-len, or decrease --random-range-ratio so that "
"prefix_len + floor(max(0, random_input_len - num_special)) "
"* (1 - range_ratio) >= 1."
)
input_lens, output_lens, offsets = self.get_sampling_params(
num_requests, range_ratio, input_len, output_len, tokenizer
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment