Commit 9925dd0e authored by 王敏's avatar 王敏
Browse files

[fix]解决EP不开启mtp时某些size报错

parent 639a11d2
......@@ -2151,7 +2151,10 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
if not self.ep_sp:
num_scheduled_tokens_list[-1] += num_tokens % num_reqs
else:
num_scheduled_tokens_list[-1] += num_tokens % min_tokens_per_req
if self.speculative_config is not None:
num_scheduled_tokens_list[-1] += num_tokens % min_tokens_per_req
else:
num_scheduled_tokens_list[-1] += num_tokens % num_reqs
assert sum(num_scheduled_tokens_list) == num_tokens
assert len(num_scheduled_tokens_list) == num_reqs
num_scheduled_tokens = np.array(num_scheduled_tokens_list,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment