pd separation uses default scheduling and set VLLM_USE_PD_SPLIT=1

a4bcf959 · zhuwenwen · ff8b5e11 · a4bcf959 · a4bcf959 · a4bcf959
Commit a4bcf959 authored Feb 09, 2026 by zhuwenwen
Showing with 7 additions and 5 deletions

vllm/envs.py vllm/envs.py +1 -1

vllm/model_executor/model_loader/utils.py vllm/model_executor/model_loader/utils.py +4 -4

vllm/v1/core/sched/scheduler.py vllm/v1/core/sched/scheduler.py +2 -0

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -1795,7 +1795,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
        lambda: bool(int(os.getenv("USE_FUSED_RMS_QUANT", "0"))),
    # vLLM will split prefill and decode, not mix up
    "VLLM_USE_PD_SPLIT":
-        lambda: (os.environ.get("VLLM_USE_PD_SPLIT", "False").lower() in
+        lambda: (os.environ.get("VLLM_USE_PD_SPLIT", "True").lower() in
                 ("true", "1")), 
    # vLLM will sync to avoid pp vmfault
    "VLLM_USE_PP_SYNC":

--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -200,8 +200,8 @@ def _get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module],
                #     if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
                #         os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
            else:
-                # if not envs.is_set("VLLM_USE_PD_SPLIT"):
+                if not envs.is_set("VLLM_USE_PD_SPLIT"):
-                #     os.environ['VLLM_USE_PD_SPLIT'] = '1'
+                    os.environ['VLLM_USE_PD_SPLIT'] = '1'
                if architectures in [['Qwen3MoeForCausalLM']]:
                    if not envs.is_set("VLLM_USE_LIGHTOP_MOE_ALIGN"):
                        os.environ['VLLM_USE_LIGHTOP_MOE_ALIGN'] = '1'
@@ -237,8 +237,8 @@ def _get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module],
                #     if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
                #         os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
            else:
-                # if not envs.is_set("VLLM_USE_PD_SPLIT"):
+                if not envs.is_set("VLLM_USE_PD_SPLIT"):
-                #     os.environ['VLLM_USE_PD_SPLIT'] = '1'
+                    os.environ['VLLM_USE_PD_SPLIT'] = '1'
                if architectures in [['Qwen3MoeForCausalLM']]:
                    if not envs.is_set("VLLM_USE_LIGHTOP_MOE_ALIGN"):
                        os.environ['VLLM_USE_LIGHTOP_MOE_ALIGN'] = '1'

--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -1474,6 +1474,8 @@ class Scheduler(SchedulerInterface):
    def schedule(self) -> SchedulerOutput:
        if envs.VLLM_USE_PD_SPLIT:
+            if self.connector is not None:
+                    return self.schedule_default()
            if self.use_mla:
                if self.full_cuda_graph and self.num_spec_tokens > 0:
                    return self.schedule_split_pd()