Commit 243b2f0c authored by zhuwenwen's avatar zhuwenwen
Browse files

[P/D][Feature]添加pd分离p实例对tbo的支持

parent 5865aaba
...@@ -287,6 +287,13 @@ def tbo_split_and_execute_model( ...@@ -287,6 +287,13 @@ def tbo_split_and_execute_model(
attn_metadata_left = prepare_tbo_atten_metadata(runner, input_split.scheduler_output_left, input_split.req_ids_left, 0) attn_metadata_left = prepare_tbo_atten_metadata(runner, input_split.scheduler_output_left, input_split.req_ids_left, 0)
attn_metadata_right = prepare_tbo_atten_metadata(runner, input_split.scheduler_output_right, input_split.req_ids_right, input_split.req_num_left) attn_metadata_right = prepare_tbo_atten_metadata(runner, input_split.scheduler_output_right, input_split.req_ids_right, input_split.req_num_left)
with set_forward_context(attn_metadata,
runner.vllm_config,
num_tokens=num_input_tokens,
num_tokens_across_dp=num_tokens_across_dp,
skip_cuda_graphs=True):
runner.maybe_setup_kv_connector(scheduler_output)
model_output = tbo_model_executable_v1( model_output = tbo_model_executable_v1(
runner, runner,
attn_metadata_left, attn_metadata_left,
...@@ -298,7 +305,11 @@ def tbo_split_and_execute_model( ...@@ -298,7 +305,11 @@ def tbo_split_and_execute_model(
positions, positions,
intermediate_tensors, intermediate_tensors,
inputs_embeds) inputs_embeds)
finished_sending, finished_recving = None, None
runner.maybe_wait_for_kv_save()
finished_sending, finished_recving = (
runner.get_finished_kv_transfers(scheduler_output))
#finished_sending, finished_recving = None, None
else: else:
# Run the decoder. # Run the decoder.
# Use persistent buffers for CUDA graphs. # Use persistent buffers for CUDA graphs.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment