Commit a4bcf959 authored by zhuwenwen's avatar zhuwenwen
Browse files

pd separation uses default scheduling and set VLLM_USE_PD_SPLIT=1

parent ff8b5e11
......@@ -1795,7 +1795,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda: bool(int(os.getenv("USE_FUSED_RMS_QUANT", "0"))),
# vLLM will split prefill and decode, not mix up
"VLLM_USE_PD_SPLIT":
lambda: (os.environ.get("VLLM_USE_PD_SPLIT", "False").lower() in
lambda: (os.environ.get("VLLM_USE_PD_SPLIT", "True").lower() in
("true", "1")),
# vLLM will sync to avoid pp vmfault
"VLLM_USE_PP_SYNC":
......
......@@ -200,8 +200,8 @@ def _get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module],
# if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
# os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
else:
# if not envs.is_set("VLLM_USE_PD_SPLIT"):
# os.environ['VLLM_USE_PD_SPLIT'] = '1'
if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '1'
if architectures in [['Qwen3MoeForCausalLM']]:
if not envs.is_set("VLLM_USE_LIGHTOP_MOE_ALIGN"):
os.environ['VLLM_USE_LIGHTOP_MOE_ALIGN'] = '1'
......@@ -237,8 +237,8 @@ def _get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module],
# if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
# os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
else:
# if not envs.is_set("VLLM_USE_PD_SPLIT"):
# os.environ['VLLM_USE_PD_SPLIT'] = '1'
if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '1'
if architectures in [['Qwen3MoeForCausalLM']]:
if not envs.is_set("VLLM_USE_LIGHTOP_MOE_ALIGN"):
os.environ['VLLM_USE_LIGHTOP_MOE_ALIGN'] = '1'
......
......@@ -1474,6 +1474,8 @@ class Scheduler(SchedulerInterface):
def schedule(self) -> SchedulerOutput:
if envs.VLLM_USE_PD_SPLIT:
if self.connector is not None:
return self.schedule_default()
if self.use_mla:
if self.full_cuda_graph and self.num_spec_tokens > 0:
return self.schedule_split_pd()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment