Commit a9b4bc74 authored by 王敏's avatar 王敏
Browse files

[fix]解决EP不启用mtp时某些size报错

parent 7439b205
......@@ -2076,10 +2076,14 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
num_actual_tokens = round_down(num_tokens, 1 + self.speculative_config.num_lookahead_slots)
num_reqs = num_actual_tokens // min_tokens_per_req
num_scheduled_tokens_list = [min_tokens_per_req] * num_reqs
if not self.ep_sp:
num_scheduled_tokens_list[-1] += num_tokens % num_reqs
else:
num_scheduled_tokens_list[-1] += num_tokens % min_tokens_per_req
if self.speculative_config is not None:
num_scheduled_tokens_list[-1] += num_tokens % min_tokens_per_req
else:
num_scheduled_tokens_list[-1] += num_tokens % num_reqs
assert sum(num_scheduled_tokens_list) == num_tokens
assert len(num_scheduled_tokens_list) == num_reqs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment