Unverified Commit de9cb617 authored by Seiji Eicher's avatar Seiji Eicher Committed by GitHub
Browse files

Add docs for PrefixRepetitionDataset + enable usage with `vllm bench throughput` (#23012)


Signed-off-by: default avatarSeiji Eicher <seiji@anyscale.com>
Co-authored-by: default avatarRoger Wang <hey@rogerw.me>
parent 2dbccce8
...@@ -40,7 +40,7 @@ become available. ...@@ -40,7 +40,7 @@ become available.
<td><code>wget https://github.com/HPMLL/BurstGPT/releases/download/v1.1/BurstGPT_without_fails_2.csv</code></td> <td><code>wget https://github.com/HPMLL/BurstGPT/releases/download/v1.1/BurstGPT_without_fails_2.csv</code></td>
</tr> </tr>
<tr> <tr>
<td><strong>Sonnet</strong></td> <td><strong>Sonnet (deprecated)</strong></td>
<td style="text-align: center;"></td> <td style="text-align: center;"></td>
<td style="text-align: center;"></td> <td style="text-align: center;"></td>
<td>Local file: <code>benchmarks/sonnet.txt</code></td> <td>Local file: <code>benchmarks/sonnet.txt</code></td>
...@@ -51,6 +51,12 @@ become available. ...@@ -51,6 +51,12 @@ become available.
<td style="text-align: center;"></td> <td style="text-align: center;"></td>
<td><code>synthetic</code></td> <td><code>synthetic</code></td>
</tr> </tr>
<tr>
<td><strong>Prefix Repetition</strong></td>
<td style="text-align: center;"></td>
<td style="text-align: center;"></td>
<td><code>synthetic</code></td>
</tr>
<tr> <tr>
<td><strong>HuggingFace-VisionArena</strong></td> <td><strong>HuggingFace-VisionArena</strong></td>
<td style="text-align: center;"></td> <td style="text-align: center;"></td>
...@@ -592,6 +598,20 @@ python3 benchmarks/benchmark_prefix_caching.py \ ...@@ -592,6 +598,20 @@ python3 benchmarks/benchmark_prefix_caching.py \
--input-length-range 128:256 --input-length-range 128:256
``` ```
### Prefix Repetition Dataset
```bash
vllm bench serve \
--backend openai \
--model meta-llama/Llama-2-7b-chat-hf \
--dataset-name prefix_repetition \
--num-prompts 100 \
--prefix-repetition-prefix-len 512 \
--prefix-repetition-suffix-len 128 \
--prefix-repetition-num-prefixes 5 \
--prefix-repetition-output-len 128
```
</details> </details>
## ⚡ Example - Request Prioritization Benchmark ## ⚡ Example - Request Prioritization Benchmark
......
...@@ -18,9 +18,11 @@ from transformers import (AutoModelForCausalLM, AutoTokenizer, ...@@ -18,9 +18,11 @@ from transformers import (AutoModelForCausalLM, AutoTokenizer,
from vllm.benchmarks.datasets import (AIMODataset, BurstGPTDataset, from vllm.benchmarks.datasets import (AIMODataset, BurstGPTDataset,
ConversationDataset, ConversationDataset,
InstructCoderDataset, RandomDataset, InstructCoderDataset,
SampleRequest, ShareGPTDataset, PrefixRepetitionRandomDataset,
SonnetDataset, VisionArenaDataset) RandomDataset, SampleRequest,
ShareGPTDataset, SonnetDataset,
VisionArenaDataset)
from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format, from vllm.benchmarks.lib.utils import (convert_to_pytorch_benchmark_format,
write_to_json) write_to_json)
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
...@@ -327,6 +329,12 @@ def get_requests(args, tokenizer): ...@@ -327,6 +329,12 @@ def get_requests(args, tokenizer):
dataset_cls = AIMODataset dataset_cls = AIMODataset
common_kwargs['dataset_subset'] = None common_kwargs['dataset_subset'] = None
common_kwargs['dataset_split'] = "train" common_kwargs['dataset_split'] = "train"
elif args.dataset_name == "prefix_repetition":
dataset_cls = PrefixRepetitionRandomDataset
sample_kwargs["prefix_len"] = args.prefix_repetition_prefix_len
sample_kwargs["suffix_len"] = args.prefix_repetition_suffix_len
sample_kwargs["num_prefixes"] = args.prefix_repetition_num_prefixes
sample_kwargs["output_len"] = args.prefix_repetition_output_len
else: else:
raise ValueError(f"Unknown dataset name: {args.dataset_name}") raise ValueError(f"Unknown dataset name: {args.dataset_name}")
# Remove None values # Remove None values
...@@ -356,7 +364,11 @@ def validate_args(args): ...@@ -356,7 +364,11 @@ def validate_args(args):
raise ValueError(f"Unsupported backend: {args.backend}") raise ValueError(f"Unsupported backend: {args.backend}")
# === Dataset Configuration === # === Dataset Configuration ===
if not args.dataset and not args.dataset_path: if (
not args.dataset
and not args.dataset_path
and args.dataset_name not in {"prefix_repetition"}
):
print( print(
"When dataset path is not set, it will default to random dataset") "When dataset path is not set, it will default to random dataset")
args.dataset_name = 'random' args.dataset_name = 'random'
...@@ -432,7 +444,10 @@ def add_cli_args(parser: argparse.ArgumentParser): ...@@ -432,7 +444,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
parser.add_argument( parser.add_argument(
"--dataset-name", "--dataset-name",
type=str, type=str,
choices=["sharegpt", "random", "sonnet", "burstgpt", "hf"], choices=[
"sharegpt", "random", "sonnet", "burstgpt", "hf",
"prefix_repetition"
],
help="Name of the dataset to benchmark on.", help="Name of the dataset to benchmark on.",
default="sharegpt") default="sharegpt")
parser.add_argument( parser.add_argument(
...@@ -521,6 +536,38 @@ def add_cli_args(parser: argparse.ArgumentParser): ...@@ -521,6 +536,38 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=None, default=None,
help="Split of the HF dataset.") help="Split of the HF dataset.")
# prefix repetition dataset
prefix_repetition_group = parser.add_argument_group(
"prefix repetition dataset options")
prefix_repetition_group.add_argument(
"--prefix-repetition-prefix-len",
type=int,
default=None,
help="Number of prefix tokens per request, used only for prefix "
"repetition dataset.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-suffix-len",
type=int,
default=None,
help="Number of suffix tokens per request, used only for prefix "
"repetition dataset. Total input length is prefix_len + suffix_len.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-num-prefixes",
type=int,
default=None,
help="Number of prefixes to generate, used only for prefix repetition "
"dataset. Prompts per prefix is num_requests // num_prefixes.",
)
prefix_repetition_group.add_argument(
"--prefix-repetition-output-len",
type=int,
default=None,
help="Number of output tokens per request, used only for prefix "
"repetition dataset.",
)
parser = AsyncEngineArgs.add_cli_args(parser) parser = AsyncEngineArgs.add_cli_args(parser)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment