Commit 69cfaa53 authored by zhuwenwen's avatar zhuwenwen
Browse files

set VLLM_USE_PD_SPLIT=1

parent e5572b2a
...@@ -1222,7 +1222,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1222,7 +1222,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# vLLM will split prefill and decode, not mix up # vLLM will split prefill and decode, not mix up
"VLLM_USE_PD_SPLIT": "VLLM_USE_PD_SPLIT":
lambda: (os.environ.get("VLLM_USE_PD_SPLIT", "False").lower() in lambda: (os.environ.get("VLLM_USE_PD_SPLIT", "True").lower() in
("true", "1")), ("true", "1")),
# vLLM will sync to avoid pp vmfault # vLLM will sync to avoid pp vmfault
......
...@@ -1051,14 +1051,15 @@ class Scheduler(SchedulerInterface): ...@@ -1051,14 +1051,15 @@ class Scheduler(SchedulerInterface):
def schedule(self) -> SchedulerOutput: def schedule(self) -> SchedulerOutput:
if envs.VLLM_USE_PD_SPLIT: if envs.VLLM_USE_PD_SPLIT:
return self.schedule_split_pd() if self.use_mla:
else: if self.full_cuda_graph and self.num_spec_tokens > 0:
if self.connector is not None: return self.schedule_split_pd()
return self.schedule_default() else:
if self.full_cuda_graph and self.use_mla and self.num_spec_tokens > 0 : self.schedule_default()
return self.schedule_split_pd()
else: else:
return self.schedule_default() return self.schedule_split_pd()
else:
return self.schedule_default()
def _update_after_schedule( def _update_after_schedule(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment