Commit 76e22965 authored by jujl1's avatar jujl1
Browse files

feat: 兼容MTP零消耗和主模型+MTP零消耗(VLLM_ZERO_OVERHEAD_ENHANCE=1)开启

parent 8a413453
......@@ -703,7 +703,8 @@ class V1ZeroModelRunner(GPUModelRunner):
is_output_valid = False
# Get the valid generated tokens.
sampled_token_ids = sampler_output.sampled_token_ids
over_head_enhance = envs.VLLM_ZERO_OVERHEAD_ENHANCE and self.speculative_config
over_head_enhance = (envs.VLLM_ZERO_OVERHEAD_ENHANCE and
self.speculative_config is not None)
if over_head_enhance:
# if not self.speculative_config:
# self.fix_req_ids = self.last_sampled_req_ids
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment