Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a4bcf959
Commit
a4bcf959
authored
Feb 09, 2026
by
zhuwenwen
Browse files
pd separation uses default scheduling and set VLLM_USE_PD_SPLIT=1
parent
ff8b5e11
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
5 deletions
+7
-5
vllm/envs.py
vllm/envs.py
+1
-1
vllm/model_executor/model_loader/utils.py
vllm/model_executor/model_loader/utils.py
+4
-4
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+2
-0
No files found.
vllm/envs.py
View file @
a4bcf959
...
@@ -1795,7 +1795,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -1795,7 +1795,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda
:
bool
(
int
(
os
.
getenv
(
"USE_FUSED_RMS_QUANT"
,
"0"
))),
lambda
:
bool
(
int
(
os
.
getenv
(
"USE_FUSED_RMS_QUANT"
,
"0"
))),
# vLLM will split prefill and decode, not mix up
# vLLM will split prefill and decode, not mix up
"VLLM_USE_PD_SPLIT"
:
"VLLM_USE_PD_SPLIT"
:
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_PD_SPLIT"
,
"
Fals
e"
).
lower
()
in
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_PD_SPLIT"
,
"
Tru
e"
).
lower
()
in
(
"true"
,
"1"
)),
(
"true"
,
"1"
)),
# vLLM will sync to avoid pp vmfault
# vLLM will sync to avoid pp vmfault
"VLLM_USE_PP_SYNC"
:
"VLLM_USE_PP_SYNC"
:
...
...
vllm/model_executor/model_loader/utils.py
View file @
a4bcf959
...
@@ -200,8 +200,8 @@ def _get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module],
...
@@ -200,8 +200,8 @@ def _get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module],
# if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
# if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
# os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
# os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
else
:
else
:
#
if not envs.is_set("VLLM_USE_PD_SPLIT"):
if
not
envs
.
is_set
(
"VLLM_USE_PD_SPLIT"
):
#
os.environ['VLLM_USE_PD_SPLIT'] = '1'
os
.
environ
[
'VLLM_USE_PD_SPLIT'
]
=
'1'
if
architectures
in
[[
'Qwen3MoeForCausalLM'
]]:
if
architectures
in
[[
'Qwen3MoeForCausalLM'
]]:
if
not
envs
.
is_set
(
"VLLM_USE_LIGHTOP_MOE_ALIGN"
):
if
not
envs
.
is_set
(
"VLLM_USE_LIGHTOP_MOE_ALIGN"
):
os
.
environ
[
'VLLM_USE_LIGHTOP_MOE_ALIGN'
]
=
'1'
os
.
environ
[
'VLLM_USE_LIGHTOP_MOE_ALIGN'
]
=
'1'
...
@@ -237,8 +237,8 @@ def _get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module],
...
@@ -237,8 +237,8 @@ def _get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module],
# if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
# if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
# os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
# os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
else
:
else
:
#
if not envs.is_set("VLLM_USE_PD_SPLIT"):
if
not
envs
.
is_set
(
"VLLM_USE_PD_SPLIT"
):
#
os.environ['VLLM_USE_PD_SPLIT'] = '1'
os
.
environ
[
'VLLM_USE_PD_SPLIT'
]
=
'1'
if
architectures
in
[[
'Qwen3MoeForCausalLM'
]]:
if
architectures
in
[[
'Qwen3MoeForCausalLM'
]]:
if
not
envs
.
is_set
(
"VLLM_USE_LIGHTOP_MOE_ALIGN"
):
if
not
envs
.
is_set
(
"VLLM_USE_LIGHTOP_MOE_ALIGN"
):
os
.
environ
[
'VLLM_USE_LIGHTOP_MOE_ALIGN'
]
=
'1'
os
.
environ
[
'VLLM_USE_LIGHTOP_MOE_ALIGN'
]
=
'1'
...
...
vllm/v1/core/sched/scheduler.py
View file @
a4bcf959
...
@@ -1474,6 +1474,8 @@ class Scheduler(SchedulerInterface):
...
@@ -1474,6 +1474,8 @@ class Scheduler(SchedulerInterface):
def
schedule
(
self
)
->
SchedulerOutput
:
def
schedule
(
self
)
->
SchedulerOutput
:
if
envs
.
VLLM_USE_PD_SPLIT
:
if
envs
.
VLLM_USE_PD_SPLIT
:
if
self
.
connector
is
not
None
:
return
self
.
schedule_default
()
if
self
.
use_mla
:
if
self
.
use_mla
:
if
self
.
full_cuda_graph
and
self
.
num_spec_tokens
>
0
:
if
self
.
full_cuda_graph
and
self
.
num_spec_tokens
>
0
:
return
self
.
schedule_split_pd
()
return
self
.
schedule_split_pd
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment