Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
acf9f945
Commit
acf9f945
authored
Nov 20, 2025
by
zhuwenwen
Browse files
修复pd分离开cp引起的校检问题
update VLLM_USE_PD_SPLIT=0 (for dspk)and VLLM_USE_PD_SPLIT=1 (for others)
parent
cf975626
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
2 deletions
+10
-2
vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
...ted/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
+3
-1
vllm/envs.py
vllm/envs.py
+1
-1
vllm/model_executor/model_loader/utils.py
vllm/model_executor/model_loader/utils.py
+6
-0
No files found.
vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
View file @
acf9f945
...
@@ -481,7 +481,9 @@ class P2pNcclConnector(KVConnectorBase_V1):
...
@@ -481,7 +481,9 @@ class P2pNcclConnector(KVConnectorBase_V1):
num_scheduled_tokens
=
(
num_scheduled_tokens
=
(
scheduler_output
.
num_scheduled_tokens
)[
req_id
]
scheduler_output
.
num_scheduled_tokens
)[
req_id
]
num_tokens
=
(
num_scheduled_tokens
+
num_computed_tokens
)
num_tokens
=
(
num_scheduled_tokens
+
num_computed_tokens
)
assert
req_id
in
self
.
chunked_prefill
# assert req_id in self.chunked_prefill
if
req_id
not
in
self
.
chunked_prefill
:
continue
block_ids
=
new_block_ids
[
0
]
block_ids
=
new_block_ids
[
0
]
if
not
resumed_from_preemption
:
if
not
resumed_from_preemption
:
block_ids
=
(
self
.
chunked_prefill
[
req_id
][
0
]
+
block_ids
)
block_ids
=
(
self
.
chunked_prefill
[
req_id
][
0
]
+
block_ids
)
...
...
vllm/envs.py
View file @
acf9f945
...
@@ -1642,7 +1642,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -1642,7 +1642,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
(
"true"
,
"1"
)),
(
"true"
,
"1"
)),
# vLLM will split prefill and decode, not mix up
# vLLM will split prefill and decode, not mix up
"VLLM_USE_PD_SPLIT"
:
"VLLM_USE_PD_SPLIT"
:
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_PD_SPLIT"
,
"
Tru
e"
).
lower
()
in
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_PD_SPLIT"
,
"
Fals
e"
).
lower
()
in
(
"true"
,
"1"
)),
(
"true"
,
"1"
)),
# vLLM will sync to avoid pp vmfault
# vLLM will sync to avoid pp vmfault
"VLLM_USE_PP_SYNC"
:
"VLLM_USE_PP_SYNC"
:
...
...
vllm/model_executor/model_loader/utils.py
View file @
acf9f945
...
@@ -198,6 +198,9 @@ def _get_model_architecture(
...
@@ -198,6 +198,9 @@ def _get_model_architecture(
os
.
environ
[
'VLLM_USE_LIGHTOP'
]
=
'1'
os
.
environ
[
'VLLM_USE_LIGHTOP'
]
=
'1'
if
not
envs
.
is_set
(
"VLLM_USE_OPT_CAT"
):
if
not
envs
.
is_set
(
"VLLM_USE_OPT_CAT"
):
os
.
environ
[
'VLLM_USE_OPT_CAT'
]
=
'1'
os
.
environ
[
'VLLM_USE_OPT_CAT'
]
=
'1'
else
:
if
not
envs
.
is_set
(
"VLLM_USE_PD_SPLIT"
):
os
.
environ
[
'VLLM_USE_PD_SPLIT'
]
=
'1'
if
os
.
getenv
(
'GEMM_PAD'
)
!=
'1'
:
if
os
.
getenv
(
'GEMM_PAD'
)
!=
'1'
:
os
.
environ
[
'GEMM_PAD'
]
=
'0'
os
.
environ
[
'GEMM_PAD'
]
=
'0'
...
@@ -209,6 +212,9 @@ def _get_model_architecture(
...
@@ -209,6 +212,9 @@ def _get_model_architecture(
os
.
environ
[
'VLLM_USE_LIGHTOP'
]
=
'1'
os
.
environ
[
'VLLM_USE_LIGHTOP'
]
=
'1'
if
not
envs
.
is_set
(
"VLLM_USE_OPT_CAT"
):
if
not
envs
.
is_set
(
"VLLM_USE_OPT_CAT"
):
os
.
environ
[
'VLLM_USE_OPT_CAT'
]
=
'1'
os
.
environ
[
'VLLM_USE_OPT_CAT'
]
=
'1'
else
:
if
not
envs
.
is_set
(
"VLLM_USE_PD_SPLIT"
):
os
.
environ
[
'VLLM_USE_PD_SPLIT'
]
=
'1'
# awq相关配置
# awq相关配置
try
:
try
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment