[Bugfix] Prevent benchmark_throughput.py from using duplicated random prompts (#10753)

4433195a · Michael Goin · GitHub · 4c05edb3 · 4433195a
Unverified Commit 4433195a authored Dec 02, 2024 by Michael Goin Committed by GitHub Dec 03, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 30 additions and 17 deletions

benchmarks/benchmark_throughput.py benchmarks/benchmark_throughput.py +30 -17

No files found.
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -294,23 +294,36 @@ def main(args: argparse.Namespace):
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer, trust_remote_code=args.trust_remote_code)
    if args.dataset is None:
-        # Synthesize a prompt with the given input length.
+        vocab_size = tokenizer.vocab_size
-        # As tokenizer may add additional tokens like BOS, we need to try
+        requests = []
-        # different lengths to get the desired input length.
+        for _ in range(args.num_prompts):
-        for i in range(-10, 10):
+            # Synthesize a prompt with the given input length.
-            prompt = "hi " * (args.input_len + i)
+            candidate_ids = [
-            tokenized_prompt = tokenizer(prompt).input_ids
+                random.randint(0, vocab_size - 1)
-            if len(tokenized_prompt) == args.input_len:
+                for _ in range(args.input_len)
-                break
+            ]
-        else:
+            # As tokenizer may add additional tokens like BOS, we need to try
-            raise ValueError(
+            # different lengths to get the desired input length.
-                f"Failed to synthesize a prompt with {args.input_len} tokens.")
+            for _ in range(5):  # Max attempts to correct
-        requests = [
+                candidate_prompt = tokenizer.decode(candidate_ids)
-            SampleRequest(prompt=prompt,
+                tokenized_len = len(tokenizer.encode(candidate_prompt))
-                          prompt_len=args.input_len,
-                          expected_output_len=args.output_len)
+                if tokenized_len == args.input_len:
-            for _ in range(args.num_prompts)
+                    break
-        ]
+                # Adjust length based on difference
+                diff = args.input_len - tokenized_len
+                if diff > 0:
+                    candidate_ids.extend([
+                        random.randint(100, vocab_size - 100)
+                        for _ in range(diff)
+                    ])
+                else:
+                    candidate_ids = candidate_ids[:diff]
+            requests.append(
+                SampleRequest(prompt=candidate_prompt,
+                              prompt_len=args.input_len,
+                              expected_output_len=args.output_len))
    else:
        requests = sample_requests(tokenizer, args)