[V1][Spec Decode][Ngram] 1.35x gain -> 1.95x gain on InstructCoder with prompt fix (#18971)

135cf55c · Ekagra Ranjan · GitHub · 6cac54f4 · 135cf55c · 135cf55c
Unverified Commit 135cf55c authored Jun 03, 2025 by Ekagra Ranjan Committed by GitHub Jun 03, 2025
Show whitespace changes
Inline Side-by-side

Showing with 22 additions and 2 deletions

benchmarks/benchmark_dataset.py benchmarks/benchmark_dataset.py +9 -1

vllm/benchmarks/datasets.py vllm/benchmarks/datasets.py +13 -1

No files found.
--- a/benchmarks/benchmark_dataset.py
+++ b/benchmarks/benchmark_dataset.py
@@ -865,7 +865,15 @@ class InstructCoderDataset(HuggingFaceDataset):
        for item in self.data:
            if len(sampled_requests) >= num_requests:
                break
-            prompt = f"{item['instruction']}:\n{item['input']}"
+            prompt = f"{item['input']}\n\n{item['instruction']} Just output \
+            the code, do not include any explanation."
+            # apply template
+            prompt = tokenizer.apply_chat_template(
+                [{"role": "user", "content": prompt}],
+                add_generation_prompt=True,
+                tokenize=False,
+            )
            prompt_len = len(tokenizer(prompt).input_ids)
            sampled_requests.append(
                SampleRequest(

--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@@ -880,7 +880,19 @@ class InstructCoderDataset(HuggingFaceDataset):
        for item in self.data:
            if len(sampled_requests) >= num_requests:
                break
-            prompt = f"{item['instruction']}:\n{item['input']}"
+            prompt = f"{item['input']}\n\n{item['instruction']} Just output \
+            the code, do not include any explanation."
+            # apply template
+            prompt = tokenizer.apply_chat_template(
+                [{
+                    "role": "user",
+                    "content": prompt
+                }],
+                add_generation_prompt=True,
+                tokenize=False,
+            )
            prompt_len = len(tokenizer(prompt).input_ids)
            sampled_requests.append(
                SampleRequest(