Commit ace32edb authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev-ds-wm-1225' into 'v0.9.2-dev-ds'

[fix]解决EP不启用mtp时某些size报错

See merge request dcutoolkit/deeplearing/vllm!322
parents 066cef58 a9b4bc74
......@@ -2076,10 +2076,14 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
num_actual_tokens = round_down(num_tokens, 1 + self.speculative_config.num_lookahead_slots)
num_reqs = num_actual_tokens // min_tokens_per_req
num_scheduled_tokens_list = [min_tokens_per_req] * num_reqs
if not self.ep_sp:
num_scheduled_tokens_list[-1] += num_tokens % num_reqs
else:
if self.speculative_config is not None:
num_scheduled_tokens_list[-1] += num_tokens % min_tokens_per_req
else:
num_scheduled_tokens_list[-1] += num_tokens % num_reqs
assert sum(num_scheduled_tokens_list) == num_tokens
assert len(num_scheduled_tokens_list) == num_reqs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment