Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
eec69420
Unverified
Commit
eec69420
authored
Jul 24, 2025
by
Nick Hill
Committed by
GitHub
Jul 23, 2025
Browse files
[BugFix] Fix KVConnector TP worker aggregation (#21473)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
fd48d99f
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
8 deletions
+10
-8
vllm/v1/worker/gpu_worker.py
vllm/v1/worker/gpu_worker.py
+10
-8
No files found.
vllm/v1/worker/gpu_worker.py
View file @
eec69420
...
@@ -16,7 +16,8 @@ from vllm.config import VllmConfig
...
@@ -16,7 +16,8 @@ from vllm.config import VllmConfig
from
vllm.distributed
import
(
ensure_model_parallel_initialized
,
from
vllm.distributed
import
(
ensure_model_parallel_initialized
,
init_distributed_environment
,
init_distributed_environment
,
set_custom_all_reduce
)
set_custom_all_reduce
)
from
vllm.distributed.kv_transfer
import
ensure_kv_transfer_initialized
from
vllm.distributed.kv_transfer
import
(
ensure_kv_transfer_initialized
,
has_kv_transfer_group
)
from
vllm.distributed.parallel_state
import
get_pp_group
,
get_tp_group
from
vllm.distributed.parallel_state
import
get_pp_group
,
get_tp_group
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
...
@@ -342,19 +343,20 @@ class Worker(WorkerBase):
...
@@ -342,19 +343,20 @@ class Worker(WorkerBase):
assert
isinstance
(
output
,
IntermediateTensors
)
assert
isinstance
(
output
,
IntermediateTensors
)
get_pp_group
().
send_tensor_dict
(
output
.
tensors
,
get_pp_group
().
send_tensor_dict
(
output
.
tensors
,
all_gather_group
=
get_tp_group
())
all_gather_group
=
get_tp_group
())
if
not
has_kv_transfer_group
():
return
None
# In case of PP with kv transfer, we need to pass through the
# In case of PP with kv transfer, we need to pass through the
# finished_sending and finished_recving buffers.
# finished_sending and finished_recving buffers.
empty
_output
=
EMPTY_MODEL_RUNNER_OUTPUT
new
_output
=
EMPTY_MODEL_RUNNER_OUTPUT
if
output
.
finished_sending
or
output
.
finished_recving
:
if
output
.
finished_sending
or
output
.
finished_recving
:
empty
_output
=
copy
.
copy
(
empty
_output
)
new
_output
=
copy
.
copy
(
new
_output
)
empty
_output
.
finished_sending
=
output
.
finished_sending
new
_output
.
finished_sending
=
output
.
finished_sending
empty
_output
.
finished_recving
=
output
.
finished_recving
new
_output
.
finished_recving
=
output
.
finished_recving
output
=
empty
_output
output
=
new
_output
assert
isinstance
(
output
,
ModelRunnerOutput
)
assert
isinstance
(
output
,
ModelRunnerOutput
)
# return output only from the driver worker
return
output
return
output
if
self
.
is_driver_worker
else
None
def
profile
(
self
,
is_start
:
bool
=
True
):
def
profile
(
self
,
is_start
:
bool
=
True
):
if
self
.
profiler
is
None
:
if
self
.
profiler
is
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment