[bench] Support common prefix len config (for decode-only bench) (#29934)

Signed-off-by: Ming Yang <minos.future@gmail.com>

[bench] Support common prefix len config (for decode-only bench) (#29934)
Signed-off-by: Ming Yang <minos.future@gmail.com>
f16356fe · Ming Yang · GitHub · 65ee9728 · f16356fe · f16356fe
Unverified Commit f16356fe authored Dec 05, 2025 by Ming Yang Committed by GitHub Dec 05, 2025
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 0 deletions

vllm/benchmarks/datasets.py vllm/benchmarks/datasets.py +1 -0

vllm/benchmarks/serve.py vllm/benchmarks/serve.py +6 -0

No files found.
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@@ -1842,6 +1842,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
                random_seed=args.seed,
                dataset_path=args.dataset_path,
                disable_shuffle=args.disable_shuffle,
+                prefix_len=args.common_prefix_len,
            ).sample(
                tokenizer=tokenizer,
                num_requests=args.num_prompts,

--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@@ -1221,6 +1221,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
        help="Repetition penalty sampling parameter. Only has effect on "
        "openai-compatible backends.",
    )
+    sampling_group.add_argument(
+        "--common-prefix-len",
+        type=int,
+        default=None,
+        help="Common prefix length shared by all prompts (used by random dataset)",
+    )

    parser.add_argument(
        "--tokenizer-mode",