Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
720394de
Unverified
Commit
720394de
authored
Oct 14, 2025
by
Qier Li
Committed by
GitHub
Oct 14, 2025
Browse files
[KVConnector][Metrics] Aggregate scheduler-side KVConnectorStats (#26046)
Signed-off-by:
Qier Li
<
kevin44036@gmail.com
>
parent
88a49745
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
73 additions
and
0 deletions
+73
-0
tests/v1/kv_connector/unit/test_nixl_connector.py
tests/v1/kv_connector/unit/test_nixl_connector.py
+69
-0
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+4
-0
No files found.
tests/v1/kv_connector/unit/test_nixl_connector.py
View file @
720394de
...
...
@@ -839,6 +839,75 @@ def test_multi_kv_connector_stats_aggregation():
assert
kv_connector_stats
[
"FooConnector"
].
data
[
"num_foo_transfers"
]
==
6
@
patch
(
"vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper"
,
FakeNixlWrapper
,
)
def
test_scheduler_kv_connector_stats_aggregation
():
"""Test scheduler and worker KV connector stats aggregation."""
from
vllm.v1.core.sched.output
import
SchedulerOutput
scheduler
=
create_scheduler
(
create_vllm_config
())
# Worker stats with transfer metrics
worker_stats
=
NixlKVConnectorStats
()
worker_stats
.
record_transfer
(
get_default_xfer_telemetry
())
worker_stats
.
data
[
"remote_tokens"
]
=
[]
# Scheduler stats with custom metric (needs dummy transfer to avoid being skipped)
scheduler_stats
=
NixlKVConnectorStats
()
scheduler_stats
.
data
.
update
(
{
# dummy transfer just for testing, to bypass is_empty() check
"transfer_duration"
:
[
0
],
"post_duration"
:
[
0
],
"bytes_transferred"
:
[
0
],
"num_descriptors"
:
[
0
],
"remote_tokens"
:
[
128
],
}
)
# Mock the scheduler connector's stats method
scheduler
.
connector
.
get_kv_connector_stats
=
lambda
:
MultiKVConnectorStats
(
data
=
{
"NixlConnector"
:
scheduler_stats
}
)
model_output
=
ModelRunnerOutput
(
req_ids
=
[
"req_0"
],
req_id_to_index
=
{
"req_0"
:
0
},
sampled_token_ids
=
[[
123
]],
logprobs
=
None
,
prompt_logprobs_dict
=
{},
pooler_output
=
[
None
],
kv_connector_output
=
KVConnectorOutput
(
kv_connector_stats
=
MultiKVConnectorStats
(
data
=
{
"NixlConnector"
:
worker_stats
}
)
),
)
scheduler_output
=
SchedulerOutput
(
scheduled_new_reqs
=
[],
scheduled_cached_reqs
=
None
,
num_scheduled_tokens
=
{
"req_0"
:
1
},
total_num_scheduled_tokens
=
1
,
scheduled_spec_decode_tokens
=
{},
scheduled_encoder_inputs
=
{},
num_common_prefix_blocks
=
[
0
],
finished_req_ids
=
set
(),
free_encoder_mm_hashes
=
set
(),
structured_output_request_ids
=
{},
grammar_bitmask
=
None
,
)
engine_core_outputs
=
scheduler
.
update_from_output
(
scheduler_output
,
model_output
)
final_stats
=
next
(
iter
(
engine_core_outputs
.
values
())
).
scheduler_stats
.
kv_connector_stats
nixl_stats
=
final_stats
[
"NixlConnector"
]
assert
nixl_stats
.
num_successful_transfers
==
2
assert
nixl_stats
.
data
[
"remote_tokens"
]
==
[
128
]
@
pytest
.
mark
.
parametrize
(
"distributed_executor_backend"
,
[
"ray"
,
None
])
@
patch
(
"vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper"
,
...
...
vllm/v1/core/sched/scheduler.py
View file @
720394de
...
...
@@ -924,6 +924,10 @@ class Scheduler(SchedulerInterface):
kv_connector_stats
=
(
kv_connector_output
.
kv_connector_stats
if
kv_connector_output
else
None
)
if
kv_connector_stats
and
self
.
connector
:
stats
=
self
.
connector
.
get_kv_connector_stats
()
if
stats
:
kv_connector_stats
=
kv_connector_stats
.
aggregate
(
stats
)
failed_kv_load_req_ids
=
None
if
kv_connector_output
and
kv_connector_output
.
invalid_block_ids
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment