Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
61ba33d5
Commit
61ba33d5
authored
Apr 10, 2026
by
xuxz
Committed by
xuxz
Apr 10, 2026
Browse files
[PD][Feat]支持pd分离dp并行
parent
ce47a56e
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
2385 additions
and
1 deletion
+2385
-1
examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd_dp.py
...ed_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd_dp.py
+499
-0
vllm/distributed/kv_transfer/kv_connector/factory.py
vllm/distributed/kv_transfer/kv_connector/factory.py
+9
-0
vllm/distributed/kv_transfer/kv_connector/v1/du/du_swift_connector_dp.py
...d/kv_transfer/kv_connector/v1/du/du_swift_connector_dp.py
+772
-0
vllm/distributed/kv_transfer/kv_connector/v1/du/du_swift_engine_dp.py
...uted/kv_transfer/kv_connector/v1/du/du_swift_engine_dp.py
+1089
-0
vllm/envs.py
vllm/envs.py
+3
-0
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+7
-1
vllm/v1/engine/core.py
vllm/v1/engine/core.py
+6
-0
No files found.
examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd_dp.py
0 → 100644
View file @
61ba33d5
This diff is collapsed.
Click to expand it.
vllm/distributed/kv_transfer/kv_connector/factory.py
View file @
61ba33d5
...
...
@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
importlib
from
vllm
import
envs
from
collections.abc
import
Callable
from
typing
import
TYPE_CHECKING
,
Optional
,
cast
...
...
@@ -45,6 +46,7 @@ class KVConnectorFactory:
config
:
"VllmConfig"
,
role
:
KVConnectorRole
,
kv_cache_config
:
Optional
[
"KVCacheConfig"
]
=
None
,
dp_rank
:
int
=
-
1
,
)
->
KVConnectorBase
:
kv_transfer_config
=
config
.
kv_transfer_config
if
kv_transfer_config
is
None
:
...
...
@@ -77,6 +79,8 @@ class KVConnectorFactory:
if
compat_sig
:
# Old signature: __init__(self, vllm_config, role)
return
connector_cls
(
config
,
role
)
elif
envs
.
VLLM_USE_DP_CONNECTOR
:
return
connector_cls
(
config
,
role
,
kv_cache_config
,
dp_rank
)
else
:
# New signature: __init__(self, vllm_config, role, kv_cache_config)
return
connector_cls
(
config
,
role
,
kv_cache_config
)
...
...
@@ -160,6 +164,11 @@ KVConnectorFactory.register_connector(
"vllm.distributed.kv_transfer.kv_connector.v1.du.du_swift_connector"
,
"DuSwiftConnector"
)
KVConnectorFactory
.
register_connector
(
"DuSwiftConnectorDp"
,
"vllm.distributed.kv_transfer.kv_connector.v1.du.du_swift_connector_dp"
,
"DuSwiftConnectorDp"
)
KVConnectorFactory
.
register_connector
(
"LMCacheConnectorV1"
,
"vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector"
,
...
...
vllm/distributed/kv_transfer/kv_connector/v1/du/du_swift_connector_dp.py
0 → 100644
View file @
61ba33d5
This diff is collapsed.
Click to expand it.
vllm/distributed/kv_transfer/kv_connector/v1/du/du_swift_engine_dp.py
0 → 100644
View file @
61ba33d5
This diff is collapsed.
Click to expand it.
vllm/envs.py
View file @
61ba33d5
...
...
@@ -1841,6 +1841,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
# vllm will use rmsquant fused op
"USE_FUSED_RMS_QUANT"
:
lambda
:
bool
(
int
(
os
.
getenv
(
"USE_FUSED_RMS_QUANT"
,
"0"
))),
#vllm use dp connector
"VLLM_USE_DP_CONNECTOR"
:
lambda
:
bool
(
int
(
os
.
getenv
(
"VLLM_USE_DP_CONNECTOR"
,
"0"
))),
# vllm pd separation will be used async
"VLLM_P2P_ASYNC"
:
lambda
:
bool
(
int
(
os
.
getenv
(
"VLLM_P2P_ASYNC"
,
"0"
))),
...
...
vllm/v1/core/sched/scheduler.py
View file @
61ba33d5
...
...
@@ -121,7 +121,7 @@ class Scheduler(SchedulerInterface):
config
=
self
.
vllm_config
,
role
=
KVConnectorRole
.
SCHEDULER
,
kv_cache_config
=
self
.
kv_cache_config
,
)
dp_rank
=
self
.
parallel_config
.
data_parallel_rank
)
if
self
.
log_stats
:
self
.
connector_prefix_cache_stats
=
PrefixCacheStats
()
kv_load_failure_policy
=
(
...
...
@@ -556,6 +556,12 @@ class Scheduler(SchedulerInterface):
+
len
(
scheduled_running_reqs
)
>=
max_batch_running
):
break
request
=
self
.
waiting
.
peek_request
()
if
self
.
connector
and
not
self
.
connector
.
is_producer
and
\
request
.
request_id
not
in
self
.
finished_recving_kv_req_ids
and
\
envs
.
VLLM_USE_DP_CONNECTOR
:
self
.
waiting
.
pop_request
()
skipped_waiting_requests
.
prepend_request
(
request
)
continue
# KVTransfer: skip request if still waiting for remote kvs.
if
request
.
status
==
RequestStatus
.
WAITING_FOR_REMOTE_KVS
:
is_ready
=
self
.
_update_waiting_for_remote_kv
(
request
)
...
...
vllm/v1/engine/core.py
View file @
61ba33d5
...
...
@@ -66,6 +66,7 @@ from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
from
vllm.v1.structured_output
import
StructuredOutputManager
from
vllm.v1.utils
import
compute_iteration_details
from
vllm.version
import
__version__
as
VLLM_VERSION
from
vllm
import
envs
logger
=
init_logger
(
__name__
)
...
...
@@ -1155,6 +1156,11 @@ class EngineCoreProc(EngineCore):
# Push to input queue for core busy loop.
self
.
input_queue
.
put_nowait
((
request_type
,
request
))
if
isinstance
(
request
,
tuple
)
and
self
.
scheduler
.
connector
is
not
None
\
and
envs
.
VLLM_USE_DP_CONNECTOR
:
req
,
_
=
request
if
request_type
==
EngineCoreRequestType
.
ADD
:
self
.
scheduler
.
connector
.
register_req
(
req
.
request_id
)
def
process_output_sockets
(
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment