[Benchmark] Refactor sample_requests in benchmark_throughput (#3613)

Co-authored-by: Roger Wang <ywang@roblox.com>

[Benchmark] Refactor sample_requests in benchmark_throughput (#3613)
Co-authored-by: Roger Wang <ywang@roblox.com>
b7782002 · TianYu GUO · GitHub · 819a309c · b7782002
Unverified Commit b7782002 authored Apr 04, 2024 by TianYu GUO Committed by GitHub Apr 04, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 16 deletions

benchmarks/benchmark_throughput.py benchmarks/benchmark_throughput.py +15 -16

No files found.
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -29,22 +29,23 @@ def sample_requests(
    dataset = [(data["conversations"][0]["value"],
                data["conversations"][1]["value"]) for data in dataset]
-    # Tokenize the prompts and completions.
+    # Shuffle the dataset.
-    prompts = [prompt for prompt, _ in dataset]
+    random.shuffle(dataset)
-    prompt_token_ids = tokenizer(prompts).input_ids
-    completions = [completion for _, completion in dataset]
-    completion_token_ids = tokenizer(completions).input_ids
-    tokenized_dataset = []
-    for i in range(len(dataset)):
-        output_len = len(completion_token_ids[i])
-        if fixed_output_len is not None:
-            output_len = fixed_output_len
-        tokenized_dataset.append((prompts[i], prompt_token_ids[i], output_len))
-    # Filter out too long sequences.
+    # Filter out sequences that are too long or too short
    filtered_dataset: List[Tuple[str, int, int]] = []
-    for prompt, prompt_token_ids, output_len in tokenized_dataset:
+    for i in range(len(dataset)):
+        if len(filtered_dataset) == num_requests:
+            break
+        # Tokenize the prompts and completions.
+        prompt = dataset[i][0]
+        prompt_token_ids = tokenizer(prompt).input_ids
+        completion = dataset[i][1]
+        completion_token_ids = tokenizer(completion).input_ids
        prompt_len = len(prompt_token_ids)
+        output_len = len(completion_token_ids
+                         ) if fixed_output_len is None else fixed_output_len
        if prompt_len < 4 or output_len < 4:
            # Prune too short sequences.
            continue
@@ -53,9 +54,7 @@ def sample_requests(
            continue
        filtered_dataset.append((prompt, prompt_len, output_len))
-    # Sample the requests.
+    return filtered_dataset
-    sampled_requests = random.sample(filtered_dataset, num_requests)
-    return sampled_requests
 def run_vllm(