Commit a9b4bc74 authored by 王敏's avatar 王敏
Browse files

[fix]解决EP不启用mtp时某些size报错

parent 7439b205
...@@ -2076,10 +2076,14 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin): ...@@ -2076,10 +2076,14 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
num_actual_tokens = round_down(num_tokens, 1 + self.speculative_config.num_lookahead_slots) num_actual_tokens = round_down(num_tokens, 1 + self.speculative_config.num_lookahead_slots)
num_reqs = num_actual_tokens // min_tokens_per_req num_reqs = num_actual_tokens // min_tokens_per_req
num_scheduled_tokens_list = [min_tokens_per_req] * num_reqs num_scheduled_tokens_list = [min_tokens_per_req] * num_reqs
if not self.ep_sp: if not self.ep_sp:
num_scheduled_tokens_list[-1] += num_tokens % num_reqs num_scheduled_tokens_list[-1] += num_tokens % num_reqs
else: else:
num_scheduled_tokens_list[-1] += num_tokens % min_tokens_per_req if self.speculative_config is not None:
num_scheduled_tokens_list[-1] += num_tokens % min_tokens_per_req
else:
num_scheduled_tokens_list[-1] += num_tokens % num_reqs
assert sum(num_scheduled_tokens_list) == num_tokens assert sum(num_scheduled_tokens_list) == num_tokens
assert len(num_scheduled_tokens_list) == num_reqs assert len(num_scheduled_tokens_list) == num_reqs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment