Commit 121db653 authored by 王敏's avatar 王敏
Browse files

解决大EP mtp报错问题

parent 8f66f64b
......@@ -2005,7 +2005,9 @@ class GPUModelRunner(LoRAModelRunnerMixin):
num_reqs = min(num_tokens, max_num_reqs)
min_tokens_per_req = num_tokens // num_reqs
if not is_profile and self.speculative_config is not None and self.speculative_config.num_lookahead_slots > 0:
if not is_profile and self.speculative_config is not None \
and self.speculative_config.num_lookahead_slots > 0 \
and num_tokens >= (1 + self.speculative_config.num_lookahead_slots):
min_tokens_per_req = (1 + self.speculative_config.num_lookahead_slots)
num_reqs = num_tokens // min_tokens_per_req
num_scheduled_tokens_list = [min_tokens_per_req] * num_reqs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment