Commit 9925dd0e authored by 王敏's avatar 王敏
Browse files

[fix]解决EP不开启mtp时某些size报错

parent 639a11d2
...@@ -2151,7 +2151,10 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin): ...@@ -2151,7 +2151,10 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
if not self.ep_sp: if not self.ep_sp:
num_scheduled_tokens_list[-1] += num_tokens % num_reqs num_scheduled_tokens_list[-1] += num_tokens % num_reqs
else: else:
if self.speculative_config is not None:
num_scheduled_tokens_list[-1] += num_tokens % min_tokens_per_req num_scheduled_tokens_list[-1] += num_tokens % min_tokens_per_req
else:
num_scheduled_tokens_list[-1] += num_tokens % num_reqs
assert sum(num_scheduled_tokens_list) == num_tokens assert sum(num_scheduled_tokens_list) == num_tokens
assert len(num_scheduled_tokens_list) == num_reqs assert len(num_scheduled_tokens_list) == num_reqs
num_scheduled_tokens = np.array(num_scheduled_tokens_list, num_scheduled_tokens = np.array(num_scheduled_tokens_list,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment