Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fb3c32c6
Commit
fb3c32c6
authored
Sep 14, 2025
by
zhuwenwen
Browse files
Merge branch 'v0.9.2-dev-pd_tbo' into 'v0.9.2-dev'
V0.9.2 dev pd tbo See merge request dcutoolkit/deeplearing/vllm!204
parents
0627b53a
fc0e53fe
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
35 additions
and
15 deletions
+35
-15
vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
...ted/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
+12
-3
vllm/two_batch_overlap/v1/model_input_split_v1.py
vllm/two_batch_overlap/v1/model_input_split_v1.py
+23
-12
No files found.
vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
View file @
fb3c32c6
...
@@ -215,9 +215,18 @@ class P2pNcclConnector(KVConnectorBase_V1):
...
@@ -215,9 +215,18 @@ class P2pNcclConnector(KVConnectorBase_V1):
inject_kv_into_layer
(
kv_cache_layer
,
kv_cache
,
inject_kv_into_layer
(
kv_cache_layer
,
kv_cache
,
request
.
slot_mapping
,
request
.
request_id
)
request
.
slot_mapping
,
request
.
request_id
)
tensor
=
self
.
p2p_nccl_engine
.
recv_store
.
pop
(
request
.
request_id
+
"#"
+
layer_name
,
None
)
tensor_id
=
request
.
request_id
+
"#"
+
layer_name
if
tensor
is
not
None
:
if
tensor_id
in
self
.
p2p_nccl_engine
.
recv_store
:
del
tensor
tensor
=
self
.
p2p_nccl_engine
.
recv_store
.
pop
(
tensor_id
,
None
)
self
.
p2p_nccl_engine
.
send_request_id_to_tensor_ids
.
pop
(
request
.
request_id
,
None
)
self
.
p2p_nccl_engine
.
recv_request_id_to_tensor_ids
.
pop
(
request
.
request_id
,
None
)
addr
=
0
if
isinstance
(
tensor
,
tuple
):
addr
,
_
,
_
=
tensor
self
.
p2p_nccl_engine
.
pool
.
free
(
addr
)
def
wait_for_layer_load
(
self
,
layer_name
:
str
)
->
None
:
def
wait_for_layer_load
(
self
,
layer_name
:
str
)
->
None
:
"""Blocking until the KV for a specific layer is loaded into vLLM's
"""Blocking until the KV for a specific layer is loaded into vLLM's
...
...
vllm/two_batch_overlap/v1/model_input_split_v1.py
View file @
fb3c32c6
...
@@ -287,18 +287,29 @@ def tbo_split_and_execute_model(
...
@@ -287,18 +287,29 @@ def tbo_split_and_execute_model(
attn_metadata_left
=
prepare_tbo_atten_metadata
(
runner
,
input_split
.
scheduler_output_left
,
input_split
.
req_ids_left
,
0
)
attn_metadata_left
=
prepare_tbo_atten_metadata
(
runner
,
input_split
.
scheduler_output_left
,
input_split
.
req_ids_left
,
0
)
attn_metadata_right
=
prepare_tbo_atten_metadata
(
runner
,
input_split
.
scheduler_output_right
,
input_split
.
req_ids_right
,
input_split
.
req_num_left
)
attn_metadata_right
=
prepare_tbo_atten_metadata
(
runner
,
input_split
.
scheduler_output_right
,
input_split
.
req_ids_right
,
input_split
.
req_num_left
)
model_output
=
tbo_model_executable_v1
(
with
set_forward_context
(
attn_metadata
,
runner
,
runner
.
vllm_config
,
attn_metadata_left
,
num_tokens
=
num_input_tokens
,
attn_metadata_right
,
num_tokens_across_dp
=
num_tokens_across_dp
,
num_input_tokens_left
,
skip_cuda_graphs
=
True
):
num_input_tokens_right
,
runner
.
maybe_setup_kv_connector
(
scheduler_output
)
num_tokens_across_dp
,
input_ids
,
model_output
=
tbo_model_executable_v1
(
positions
,
runner
,
intermediate_tensors
,
attn_metadata_left
,
inputs_embeds
)
attn_metadata_right
,
finished_sending
,
finished_recving
=
None
,
None
num_input_tokens_left
,
num_input_tokens_right
,
num_tokens_across_dp
,
input_ids
,
positions
,
intermediate_tensors
,
inputs_embeds
)
runner
.
maybe_wait_for_kv_save
()
finished_sending
,
finished_recving
=
(
runner
.
get_finished_kv_transfers
(
scheduler_output
))
#finished_sending, finished_recving = None, None
else
:
else
:
# Run the decoder.
# Run the decoder.
# Use persistent buffers for CUDA graphs.
# Use persistent buffers for CUDA graphs.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment