解决大EP mtp报错问题

121db653 · 王敏 · 8f66f64b · 121db653
Commit 121db653 authored Sep 06, 2025 by 王敏
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +3 -1

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2005,7 +2005,9 @@ class GPUModelRunner(LoRAModelRunnerMixin):
        num_reqs = min(num_tokens, max_num_reqs)
        min_tokens_per_req = num_tokens // num_reqs

-        if not is_profile and self.speculative_config is not None and self.speculative_config.num_lookahead_slots > 0:
+        if not is_profile and self.speculative_config is not None \
+            and self.speculative_config.num_lookahead_slots > 0 \
+            and num_tokens >= (1 + self.speculative_config.num_lookahead_slots):
            min_tokens_per_req = (1 + self.speculative_config.num_lookahead_slots)
            num_reqs = num_tokens // min_tokens_per_req
        num_scheduled_tokens_list = [min_tokens_per_req] * num_reqs