Commit 21d22cbd authored by zhuwenwen's avatar zhuwenwen Committed by jujl1
Browse files

根据不同场景,更新默认调度和分离调度的选择

parent 155fe0d2
......@@ -937,8 +937,9 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
if self.cos_sin_cache.device != positions.device:
self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to(
positions.device)
cos_sin = self.cos_sin_cache[torch.add(positions, offsets)
if offsets is not None else positions]
if not envs.VLLM_USE_LIGHTOP:
cos_sin = self.cos_sin_cache[torch.add(positions, offsets)
if offsets is not None else positions]
if query.device.type == 'cuda' and not self.is_neox_style \
and not self.reference:
assert len(query.shape) == 3
......
......@@ -1028,10 +1028,15 @@ class Scheduler(SchedulerInterface):
return scheduler_output
def schedule(self) -> SchedulerOutput:
if envs.VLLM_USE_PD_SPLIT or (self.full_cuda_graph and self.use_mla and self.num_spec_tokens > 0) :
if envs.VLLM_USE_PD_SPLIT:
return self.schedule_split_pd()
else:
return self.schedule_default()
if self.connector is not None:
return self.schedule_default()
if self.full_cuda_graph and self.use_mla and self.num_spec_tokens > 0 :
return self.schedule_split_pd()
else:
return self.schedule_default()
def _update_after_schedule(
self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment