Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5898b135
Unverified
Commit
5898b135
authored
Aug 10, 2025
by
Nick Hill
Committed by
GitHub
Aug 10, 2025
Browse files
[BugFix] Fix KVConnectorOutput TPU breakage (#22598)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
b799f4b9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
10 deletions
+19
-10
tests/v1/kv_connector/unit/utils.py
tests/v1/kv_connector/unit/utils.py
+8
-4
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+2
-2
vllm/v1/worker/tpu_model_runner.py
vllm/v1/worker/tpu_model_runner.py
+9
-4
No files found.
tests/v1/kv_connector/unit/utils.py
View file @
5898b135
...
...
@@ -179,6 +179,13 @@ def create_model_runner_output(
sampled_token
=
EOS_TOKEN_ID
if
use_eos
else
0
sampled_token_ids
=
[[
sampled_token
]
for
_
in
req_ids
]
kv_connector_output
=
None
if
(
finished_sending
is
None
and
finished_recving
is
None
)
else
KVConnectorOutput
(
finished_sending
=
finished_sending
,
finished_recving
=
finished_recving
,
)
# Make output data structure.
return
ModelRunnerOutput
(
req_ids
=
req_ids
,
...
...
@@ -188,10 +195,7 @@ def create_model_runner_output(
logprobs
=
None
,
prompt_logprobs_dict
=
{},
pooler_output
=
None
,
kv_connector_output
=
KVConnectorOutput
(
finished_sending
=
finished_sending
,
finished_recving
=
finished_recving
,
),
kv_connector_output
=
kv_connector_output
,
)
...
...
vllm/v1/core/sched/scheduler.py
View file @
5898b135
...
...
@@ -1151,8 +1151,8 @@ class Scheduler(SchedulerInterface):
scheduler the request during the next step.
"""
assert
self
.
connector
is
not
None
self
.
connector
.
update_connector_output
(
kv_connector_output
)
if
self
.
connector
is
not
None
:
self
.
connector
.
update_connector_output
(
kv_connector_output
)
# KV Connector:: update recv and send status from last step.
for
req_id
in
(
kv_connector_output
.
finished_recving
or
()):
...
...
vllm/v1/worker/tpu_model_runner.py
View file @
5898b135
...
...
@@ -1138,6 +1138,13 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
i
,
target_slice
]
=
valid_sampled_token_ids
[
i
]
req_state
.
output_token_ids
.
extend
(
valid_sampled_token_ids
[
i
])
kv_connector_output
=
None
if
(
finished_sending
is
None
and
finished_recving
is
None
)
else
KVConnectorOutput
(
finished_sending
=
finished_sending
,
finished_recving
=
finished_recving
,
)
model_runner_output
=
ModelRunnerOutput
(
req_ids
=
req_ids
,
req_id_to_index
=
self
.
input_batch
.
req_id_to_index
,
...
...
@@ -1146,10 +1153,8 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
logprobs
=
logprobs_lists
,
prompt_logprobs_dict
=
prompt_logprobs_dict
,
pooler_output
=
[],
kv_connector_output
=
KVConnectorOutput
(
finished_sending
=
finished_sending
,
finished_recving
=
finished_recving
,
))
kv_connector_output
=
kv_connector_output
,
)
# Check there are no new graphs compiled - all the graphs should be
# captured and compiled during warm up.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment