Commit 244b534f authored by zhuwenwen's avatar zhuwenwen
Browse files

根据不同场景,更新默认调度和分离调度的选择

parent dbbc0b2e
......@@ -937,6 +937,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
if self.cos_sin_cache.device != positions.device:
self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to(
positions.device)
if not envs.VLLM_USE_LIGHTOP:
cos_sin = self.cos_sin_cache[torch.add(positions, offsets)
if offsets is not None else positions]
if query.device.type == 'cuda' and not self.is_neox_style \
......
......@@ -1028,7 +1028,12 @@ class Scheduler(SchedulerInterface):
return scheduler_output
def schedule(self) -> SchedulerOutput:
if envs.VLLM_USE_PD_SPLIT or (self.full_cuda_graph and self.use_mla and self.num_spec_tokens > 0) :
if envs.VLLM_USE_PD_SPLIT:
return self.schedule_split_pd()
else:
if self.connector is not None:
return self.schedule_default()
if self.full_cuda_graph and self.use_mla and self.num_spec_tokens > 0 :
return self.schedule_split_pd()
else:
return self.schedule_default()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment