Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
4c207e0c
Unverified
Commit
4c207e0c
authored
Oct 17, 2025
by
Yan Ru Pei
Committed by
GitHub
Oct 17, 2025
Browse files
chore: remove kv metrics scraping and aggregation (#3701)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
2c2f7c7d
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
29 additions
and
527 deletions
+29
-527
lib/bindings/python/examples/metrics/README.md
lib/bindings/python/examples/metrics/README.md
+1
-5
lib/bindings/python/rust/lib.rs
lib/bindings/python/rust/lib.rs
+0
-3
lib/bindings/python/rust/llm/kv.rs
lib/bindings/python/rust/llm/kv.rs
+15
-109
lib/bindings/python/src/dynamo/_core.pyi
lib/bindings/python/src/dynamo/_core.pyi
+7
-25
lib/bindings/python/src/dynamo/llm/__init__.py
lib/bindings/python/src/dynamo/llm/__init__.py
+0
-3
lib/bindings/python/tests/test_kv_bindings.py
lib/bindings/python/tests/test_kv_bindings.py
+1
-88
lib/llm/src/kv_router.rs
lib/llm/src/kv_router.rs
+0
-2
lib/llm/src/kv_router/metrics_aggregator.rs
lib/llm/src/kv_router/metrics_aggregator.rs
+0
-198
lib/llm/src/kv_router/publisher.rs
lib/llm/src/kv_router/publisher.rs
+4
-63
lib/llm/src/mocker/engine.rs
lib/llm/src/mocker/engine.rs
+1
-31
No files found.
lib/bindings/python/examples/metrics/README.md
View file @
4c207e0c
...
@@ -290,8 +290,7 @@ graph TB
...
@@ -290,8 +290,7 @@ graph TB
NATS[NATS Server<br/>KV_METRICS_SUBJECT]
NATS[NATS Server<br/>KV_METRICS_SUBJECT]
end
end
subgraph "Aggregator Process Components"
subgraph "Other Consumers (e.g., KvWorkerMonitor)"
AGG[KvMetricsAggregator<br/>llm/src/kv_router/metrics_aggregator.rs]
SUB[NATS Subscriber<br/>component/namespace.rs]
SUB[NATS Subscriber<br/>component/namespace.rs]
end
end
...
@@ -303,7 +302,6 @@ graph TB
...
@@ -303,7 +302,6 @@ graph TB
style WATCH fill:#ce422b,color:#fff
style WATCH fill:#ce422b,color:#fff
style PROM1 fill:#ce422b,color:#fff
style PROM1 fill:#ce422b,color:#fff
style NATS fill:#27aae1,color:#fff
style NATS fill:#27aae1,color:#fff
style AGG fill:#ce422b,color:#fff
style SUB fill:#ce422b,color:#fff
style SUB fill:#ce422b,color:#fff
style SS fill:#6c757d,color:#fff
style SS fill:#6c757d,color:#fff
end
end
...
@@ -316,9 +314,7 @@ graph TB
...
@@ -316,9 +314,7 @@ graph TB
WMP -->|"tx.send(metrics)"| WATCH
WMP -->|"tx.send(metrics)"| WATCH
WATCH -->|"publish(KV_METRICS_SUBJECT, LoadEvent)"| NATS
WATCH -->|"publish(KV_METRICS_SUBJECT, LoadEvent)"| NATS
NATS -->|"subscribe_with_type LoadEvent"| SUB
NATS -->|"subscribe_with_type LoadEvent"| SUB
SUB -->|"discover endpoints"| AGG
SS -->|"Worker: gather() from PROM1"| PROM1
SS -->|"Worker: gather() from PROM1"| PROM1
SS -->|"Aggregator: scrape_stats()"| AGG
```
```
##### Method 2: Dynamic Registration - Component View
##### Method 2: Dynamic Registration - Component View
...
...
lib/bindings/python/rust/lib.rs
View file @
4c207e0c
...
@@ -146,9 +146,6 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
...
@@ -146,9 +146,6 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
m
.add_class
::
<
llm
::
kv
::
OverlapScores
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
OverlapScores
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
KvIndexer
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
KvIndexer
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
ApproxKvIndexer
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
ApproxKvIndexer
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
EndpointKvMetrics
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
AggregatedMetrics
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
KvMetricsAggregator
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
KvEventPublisher
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
KvEventPublisher
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
RadixTree
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
RadixTree
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
ZmqKvEventListener
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
ZmqKvEventListener
>
()
?
;
...
...
lib/bindings/python/rust/llm/kv.rs
View file @
4c207e0c
...
@@ -49,39 +49,35 @@ impl WorkerMetricsPublisher {
...
@@ -49,39 +49,35 @@ impl WorkerMetricsPublisher {
}
}
#[pyo3(signature
=
(component,
metrics_labels
=
None))]
#[pyo3(signature
=
(component,
metrics_labels
=
None))]
#[allow(unused_variables)]
fn
create_endpoint
<
'p
>
(
fn
create_endpoint
<
'p
>
(
&
self
,
&
self
,
py
:
Python
<
'p
>
,
py
:
Python
<
'p
>
,
component
:
Component
,
component
:
Component
,
metrics_labels
:
Option
<
Vec
<
(
String
,
String
)
>>
,
metrics_labels
:
Option
<
Vec
<
(
String
,
String
)
>>
,
// TODO: fully remove this
)
->
PyResult
<
Bound
<
'p
,
PyAny
>>
{
)
->
PyResult
<
Bound
<
'p
,
PyAny
>>
{
// Emit deprecation warning if metrics_labels is provided
if
metrics_labels
.is_some
()
{
let
warnings
=
py
.import
(
"warnings"
)
?
;
warnings
.call_method1
(
"warn"
,
(
"The 'metrics_labels' parameter is deprecated and no longer used. It will be removed in a future version."
,
py
.get_type
::
<
pyo3
::
exceptions
::
PyDeprecationWarning
>
(),
),
)
?
;
}
let
rs_publisher
=
self
.inner
.clone
();
let
rs_publisher
=
self
.inner
.clone
();
let
rs_component
=
component
.inner
.clone
();
let
rs_component
=
component
.inner
.clone
();
pyo3_async_runtimes
::
tokio
::
future_into_py
(
py
,
async
move
{
pyo3_async_runtimes
::
tokio
::
future_into_py
(
py
,
async
move
{
// Convert Python labels to Option<&[(&str, &str)]> expected by Rust API
let
metrics_labels_ref
:
Option
<
Vec
<
(
&
str
,
&
str
)
>>
=
if
let
Some
(
metrics_labels
)
=
metrics_labels
.as_ref
()
{
if
metrics_labels
.is_empty
()
{
None
}
else
{
Some
(
metrics_labels
.iter
()
.map
(|(
k
,
v
)|
(
k
.as_str
(),
v
.as_str
()))
.collect
(),
)
}
}
else
{
None
};
// Register Prometheus metrics first
// Register Prometheus metrics first
rs_publisher
rs_publisher
.register_prometheus_metrics
(
&
rs_component
)
.register_prometheus_metrics
(
&
rs_component
)
.map_err
(
to_pyerr
)
?
;
.map_err
(
to_pyerr
)
?
;
rs_publisher
rs_publisher
.create_endpoint
(
rs_component
,
metrics_labels_ref
.as_deref
()
)
.create_endpoint
(
rs_component
)
.await
.await
.map_err
(
to_pyerr
)
?
;
.map_err
(
to_pyerr
)
?
;
Ok
(())
Ok
(())
...
@@ -552,96 +548,6 @@ impl ApproxKvIndexer {
...
@@ -552,96 +548,6 @@ impl ApproxKvIndexer {
}
}
}
}
#[pyclass]
#[derive(Clone)]
pub
(
crate
)
struct
EndpointKvMetrics
{
#[pyo3(get,
set)]
pub
worker_id
:
WorkerId
,
#[pyo3(get,
set)]
pub
request_active_slots
:
u64
,
#[pyo3(get,
set)]
pub
request_total_slots
:
u64
,
#[pyo3(get,
set)]
pub
kv_active_blocks
:
u64
,
#[pyo3(get,
set)]
pub
kv_total_blocks
:
u64
,
#[pyo3(get,
set)]
pub
num_requests_waiting
:
u64
,
#[pyo3(get,
set)]
pub
gpu_cache_usage_perc
:
f32
,
#[pyo3(get,
set)]
pub
gpu_prefix_cache_hit_rate
:
f32
,
}
#[pyclass]
#[derive(Clone)]
pub
(
crate
)
struct
AggregatedMetrics
{
#[pyo3(get,
set)]
pub
endpoints
:
Vec
<
EndpointKvMetrics
>
,
#[pyo3(get,
set)]
pub
load_avg
:
f64
,
#[pyo3(get,
set)]
pub
load_std
:
f64
,
}
#[pyclass]
pub
(
crate
)
struct
KvMetricsAggregator
{
inner
:
Arc
<
llm_rs
::
kv_router
::
metrics_aggregator
::
KvMetricsAggregator
>
,
}
#[pymethods]
impl
KvMetricsAggregator
{
#[new]
fn
new
(
component
:
Component
)
->
PyResult
<
Self
>
{
let
runtime
=
pyo3_async_runtimes
::
tokio
::
get_runtime
();
runtime
.block_on
(
async
{
let
inner
=
llm_rs
::
kv_router
::
metrics_aggregator
::
KvMetricsAggregator
::
new
(
component
.inner
.clone
(),
component
.inner
.drt
()
.runtime
()
.child_token
(),
)
.await
;
Ok
(
Self
{
inner
:
inner
.into
(),
})
})
}
fn
get_metrics
<
'p
>
(
&
self
,
py
:
Python
<
'p
>
)
->
PyResult
<
Bound
<
'p
,
PyAny
>>
{
// TODO: update EndpointKvMetrics to match the new ForwardPassMetrics struct
let
endpoints
=
self
.inner
.get_endpoints
();
let
load_avg
=
endpoints
.load_avg
;
let
load_std
=
endpoints
.load_std
;
let
endpoint_kv_metrics
=
endpoints
.endpoints
.into_iter
()
.map
(|(
worker_id
,
endpoint
)|
{
let
metrics
=
endpoint
.data
;
let
LoadMetrics
::
EngineLoadMetrics
(
fwd_pass_metrics
)
=
metrics
else
{
panic!
(
"Endpoints do not contain forward pass metrics."
);
};
EndpointKvMetrics
{
worker_id
,
request_active_slots
:
fwd_pass_metrics
.worker_stats.request_active_slots
,
request_total_slots
:
fwd_pass_metrics
.worker_stats.request_total_slots
,
kv_active_blocks
:
fwd_pass_metrics
.kv_stats.kv_active_blocks
,
kv_total_blocks
:
fwd_pass_metrics
.kv_stats.kv_total_blocks
,
num_requests_waiting
:
fwd_pass_metrics
.worker_stats.num_requests_waiting
,
gpu_cache_usage_perc
:
fwd_pass_metrics
.kv_stats.gpu_cache_usage_perc
,
gpu_prefix_cache_hit_rate
:
fwd_pass_metrics
.kv_stats.gpu_prefix_cache_hit_rate
,
}
})
.collect
();
pyo3_async_runtimes
::
tokio
::
future_into_py
(
py
,
async
move
{
Ok
(
AggregatedMetrics
{
endpoints
:
endpoint_kv_metrics
,
load_avg
,
load_std
,
})
})
}
}
#[pyclass]
#[pyclass]
pub
(
crate
)
struct
KvRecorder
{
pub
(
crate
)
struct
KvRecorder
{
inner
:
Arc
<
llm_rs
::
kv_router
::
recorder
::
KvRecorder
>
,
inner
:
Arc
<
llm_rs
::
kv_router
::
recorder
::
KvRecorder
>
,
...
...
lib/bindings/python/src/dynamo/_core.pyi
View file @
4c207e0c
...
@@ -464,6 +464,13 @@ class WorkerMetricsPublisher:
...
@@ -464,6 +464,13 @@ class WorkerMetricsPublisher:
"""
"""
Similar to Component.create_service, but only service created through
Similar to Component.create_service, but only service created through
this method will interact with KV router of the same component.
this method will interact with KV router of the same component.
Args:
component: The component to create the endpoint for
metrics_labels: [DEPRECATED] This parameter is no longer used and will be removed in a future version
.. deprecated::
The metrics_labels parameter is deprecated and has no effect.
"""
"""
def publish(
def publish(
...
@@ -745,31 +752,6 @@ class KvRecorder:
...
@@ -745,31 +752,6 @@ class KvRecorder:
"""
"""
...
...
class AggregatedMetrics:
"""
A collection of metrics of the endpoints
"""
...
class KvMetricsAggregator:
"""
A metrics aggregator will collect KV metrics of the endpoints.
"""
...
def __init__(self, component: Component) -> None:
"""
Create a `KvMetricsAggregator` object
"""
def get_metrics(self) -> AggregatedMetrics:
"""
Return the aggregated metrics of the endpoints.
"""
...
class KvEventPublisher:
class KvEventPublisher:
"""
"""
A KV event publisher will publish KV events corresponding to the component.
A KV event publisher will publish KV events corresponding to the component.
...
...
lib/bindings/python/src/dynamo/llm/__init__.py
View file @
4c207e0c
...
@@ -5,8 +5,6 @@
...
@@ -5,8 +5,6 @@
import
logging
import
logging
from
dynamo._core
import
AggregatedMetrics
as
AggregatedMetrics
try
:
try
:
from
dynamo._core
import
BlockManager
as
BlockManager
from
dynamo._core
import
BlockManager
as
BlockManager
from
dynamo._core
import
KvbmLeader
as
KvbmLeader
from
dynamo._core
import
KvbmLeader
as
KvbmLeader
...
@@ -23,7 +21,6 @@ from dynamo._core import HttpAsyncEngine as HttpAsyncEngine
...
@@ -23,7 +21,6 @@ from dynamo._core import HttpAsyncEngine as HttpAsyncEngine
from
dynamo._core
import
HttpService
as
HttpService
from
dynamo._core
import
HttpService
as
HttpService
from
dynamo._core
import
KvEventPublisher
as
KvEventPublisher
from
dynamo._core
import
KvEventPublisher
as
KvEventPublisher
from
dynamo._core
import
KvIndexer
as
KvIndexer
from
dynamo._core
import
KvIndexer
as
KvIndexer
from
dynamo._core
import
KvMetricsAggregator
as
KvMetricsAggregator
from
dynamo._core
import
KvPushRouter
as
KvPushRouter
from
dynamo._core
import
KvPushRouter
as
KvPushRouter
from
dynamo._core
import
KvRecorder
as
KvRecorder
from
dynamo._core
import
KvRecorder
as
KvRecorder
from
dynamo._core
import
KvRouterConfig
as
KvRouterConfig
from
dynamo._core
import
KvRouterConfig
as
KvRouterConfig
...
...
lib/bindings/python/tests/test_kv_bindings.py
View file @
4c207e0c
...
@@ -19,17 +19,7 @@ from typing import List
...
@@ -19,17 +19,7 @@ from typing import List
import
pytest
import
pytest
from
dynamo.llm
import
(
from
dynamo.llm
import
ApproxKvIndexer
,
KvEventPublisher
,
KvIndexer
,
RadixTree
ApproxKvIndexer
,
ForwardPassMetrics
,
KvEventPublisher
,
KvIndexer
,
KvMetricsAggregator
,
KvStats
,
RadixTree
,
WorkerMetricsPublisher
,
WorkerStats
,
)
from
dynamo.runtime
import
Component
,
DistributedRuntime
from
dynamo.runtime
import
Component
,
DistributedRuntime
pytestmark
=
pytest
.
mark
.
pre_merge
pytestmark
=
pytest
.
mark
.
pre_merge
...
@@ -226,80 +216,3 @@ class EventPublisher:
...
@@ -226,80 +216,3 @@ class EventPublisher:
],
# block_hashes
],
# block_hashes
)
)
self
.
event_id_counter
+=
1
self
.
event_id_counter
+=
1
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
forked
async
def
test_metrics_aggregator
(
distributed_runtime
):
namespace
=
"kv_test"
component
=
"metrics"
kv_listener
=
distributed_runtime
.
namespace
(
namespace
).
component
(
component
)
await
kv_listener
.
create_service
()
# aggregator
metrics_aggregator
=
KvMetricsAggregator
(
kv_listener
)
# has nothing to aggregate as worker has not started
metrics
=
await
metrics_aggregator
.
get_metrics
()
assert
not
metrics
.
endpoints
expected_metrics
=
{
"request_active_slots"
:
0
,
"request_total_slots"
:
1024
,
"kv_active_blocks"
:
523
,
"kv_total_blocks"
:
777
,
"num_requests_waiting"
:
10
,
"gpu_cache_usage_perc"
:
0.5
,
"gpu_prefix_cache_hit_rate"
:
0.75
,
}
# need 'create_task' to put publisher task in the background
asyncio
.
create_task
(
metrics_publisher_task
(
kv_listener
,
expected_metrics
))
# needs time for publisher to spawn up
# Using shorter intervals for faster detection in normal cases
for
i
in
range
(
20
):
# Try up to 20 times (10 seconds total)
await
asyncio
.
sleep
(
0.5
)
# Wait 500ms between retries
metrics
=
await
metrics_aggregator
.
get_metrics
()
if
metrics
.
endpoints
:
break
assert
metrics
.
endpoints
,
f
"No metrics endpoints found after
{
(
i
+
1
)
*
0.5
}
s"
for
endpoint
in
metrics
.
endpoints
:
# [TODO] not really checking id for now, can't get it as create_endpoint()
# create and serve the endpoint internally
assert
endpoint
.
worker_id
!=
0
assert
endpoint
.
request_active_slots
==
expected_metrics
[
"request_active_slots"
]
assert
endpoint
.
request_total_slots
==
expected_metrics
[
"request_total_slots"
]
assert
endpoint
.
kv_active_blocks
==
expected_metrics
[
"kv_active_blocks"
]
assert
endpoint
.
kv_total_blocks
==
expected_metrics
[
"kv_total_blocks"
]
async
def
metrics_publisher_task
(
kv_listener
,
expected_metrics
):
# Construct the structured ForwardPassMetrics payload expected by the
# current Rust bindings instead of passing the individual scalar values
# directly. The API for `WorkerMetricsPublisher.publish`
# changed from a list of positional scalars to a single
# `ForwardPassMetrics` object.
metrics_publisher
=
WorkerMetricsPublisher
()
worker_stats
=
WorkerStats
(
expected_metrics
[
"request_active_slots"
],
expected_metrics
[
"request_total_slots"
],
expected_metrics
[
"num_requests_waiting"
],
0
,
# data_parallel_rank (0 = DP not enabled)
)
kv_stats
=
KvStats
(
expected_metrics
[
"kv_active_blocks"
],
expected_metrics
[
"kv_total_blocks"
],
expected_metrics
[
"gpu_cache_usage_perc"
],
expected_metrics
[
"gpu_prefix_cache_hit_rate"
],
)
metrics
=
ForwardPassMetrics
(
worker_stats
,
kv_stats
,
None
)
# Publish and expose the metrics via the endpoint so that the aggregator
# test can discover them.
metrics_publisher
.
publish
(
metrics
)
await
metrics_publisher
.
create_endpoint
(
kv_listener
)
lib/llm/src/kv_router.rs
View file @
4c207e0c
...
@@ -22,7 +22,6 @@ use serde::{Deserialize, Serialize};
...
@@ -22,7 +22,6 @@ use serde::{Deserialize, Serialize};
pub
mod
approx
;
pub
mod
approx
;
pub
mod
indexer
;
pub
mod
indexer
;
pub
mod
metrics_aggregator
;
pub
mod
protocols
;
pub
mod
protocols
;
pub
mod
publisher
;
pub
mod
publisher
;
pub
mod
recorder
;
pub
mod
recorder
;
...
@@ -42,7 +41,6 @@ use crate::{
...
@@ -42,7 +41,6 @@ use crate::{
LocalBlockHash
,
RouterRequest
,
RouterResponse
,
WorkerSelectionResult
,
WorkerWithDpRank
,
LocalBlockHash
,
RouterRequest
,
RouterResponse
,
WorkerSelectionResult
,
WorkerWithDpRank
,
},
},
scheduler
::{
KvScheduler
,
KvSchedulerError
,
PotentialLoad
,
SchedulingRequest
},
scheduler
::{
KvScheduler
,
KvSchedulerError
,
PotentialLoad
,
SchedulingRequest
},
scoring
::
ProcessedEndpoints
,
subscriber
::
start_kv_router_background
,
subscriber
::
start_kv_router_background
,
},
},
local_model
::
runtime_config
::
ModelRuntimeConfig
,
local_model
::
runtime_config
::
ModelRuntimeConfig
,
...
...
lib/llm/src/kv_router/metrics_aggregator.rs
deleted
100644 → 0
View file @
2c2f7c7d
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
std
::
sync
::
Once
;
use
crate
::
kv_router
::
KV_METRICS_ENDPOINT
;
pub
use
crate
::
kv_router
::
protocols
::{
ForwardPassMetrics
,
LoadMetrics
,
PredictiveLoadMetrics
};
use
crate
::
kv_router
::
ProcessedEndpoints
;
use
crate
::
kv_router
::
scoring
::
Endpoint
;
use
dynamo_runtime
::
component
::
Component
;
use
dynamo_runtime
::{
Result
,
service
::
EndpointInfo
,
utils
::
Duration
};
use
tokio
::
sync
::
watch
;
use
tokio_util
::
sync
::
CancellationToken
;
static
METRICS_WAITING_MESSAGE
:
Once
=
Once
::
new
();
static
METRICS_FOUND_MESSAGE
:
Once
=
Once
::
new
();
pub
struct
EndpointCollector
{
pub
service_name
:
String
,
pub
endpoints_rx
:
watch
::
Receiver
<
ProcessedEndpoints
>
,
}
impl
EndpointCollector
{
pub
async
fn
new
(
component
:
Component
,
cancellation_token
:
CancellationToken
)
->
Self
{
let
(
watch_tx
,
watch_rx
)
=
watch
::
channel
(
ProcessedEndpoints
::
default
());
tokio
::
spawn
(
collect_endpoints_task
(
component
.clone
(),
watch_tx
,
cancellation_token
.clone
(),
"generate"
.to_string
(),
));
Self
{
service_name
:
component
.service_name
(),
endpoints_rx
:
watch_rx
,
}
}
pub
fn
get_endpoints
(
&
self
)
->
ProcessedEndpoints
{
self
.endpoints_rx
.borrow
()
.clone
()
}
pub
fn
endpoints_watcher
(
&
self
)
->
watch
::
Receiver
<
ProcessedEndpoints
>
{
self
.endpoints_rx
.clone
()
}
}
pub
struct
KvMetricsAggregator
{
pub
service_name
:
String
,
pub
endpoints_rx
:
watch
::
Receiver
<
ProcessedEndpoints
>
,
}
impl
KvMetricsAggregator
{
pub
async
fn
new
(
component
:
Component
,
cancellation_token
:
CancellationToken
)
->
Self
{
let
(
watch_tx
,
watch_rx
)
=
watch
::
channel
(
ProcessedEndpoints
::
default
());
tokio
::
spawn
(
collect_endpoints_task
(
component
.clone
(),
watch_tx
,
cancellation_token
.clone
(),
KV_METRICS_ENDPOINT
.to_string
(),
));
Self
{
service_name
:
component
.service_name
(),
endpoints_rx
:
watch_rx
,
}
}
pub
fn
get_endpoints
(
&
self
)
->
ProcessedEndpoints
{
self
.endpoints_rx
.borrow
()
.clone
()
}
pub
fn
endpoints_watcher
(
&
self
)
->
watch
::
Receiver
<
ProcessedEndpoints
>
{
self
.endpoints_rx
.clone
()
}
}
/// [gluo TODO] 'collect_endpoints' is from component/metrics,
/// should consolidate these functions into generic metrics aggregator
/// functions and shared by KvMetricsAggregator and component/metrics.
/// Collect endpoints from a component
pub
async
fn
collect_endpoints
(
component
:
&
Component
,
subject
:
&
str
,
timeout
:
Duration
,
)
->
Result
<
Vec
<
EndpointInfo
>>
{
// Collect stats from each backend
let
stream
=
component
.scrape_stats
(
timeout
)
.await
?
;
// Filter the stats by the service subject
let
endpoints
=
stream
.into_endpoints
()
.filter
(|
e
|
e
.subject
.starts_with
(
subject
))
.collect
::
<
Vec
<
_
>>
();
if
endpoints
.is_empty
()
{
// Only print it once, we poll while the worker starts
METRICS_WAITING_MESSAGE
.call_once
(||
{
tracing
::
debug!
(
"Waiting for metrics endpoint.."
);
});
}
else
{
METRICS_FOUND_MESSAGE
.call_once
(||
{
tracing
::
debug!
(
"Found metrics endpoint"
);
});
}
Ok
(
endpoints
)
}
pub
async
fn
collect_endpoints_task
(
component
:
Component
,
watch_tx
:
watch
::
Sender
<
ProcessedEndpoints
>
,
cancel
:
CancellationToken
,
subject
:
String
,
)
{
let
backoff_delay
=
Duration
::
from_millis
(
100
);
let
scrape_timeout
=
Duration
::
from_millis
(
300
);
let
endpoint
=
component
.endpoint
(
&
subject
);
let
service_subject
=
endpoint
.subject
();
// Keep track of the last sent value to avoid unnecessary updates
let
mut
last_sent
:
Option
<
ProcessedEndpoints
>
=
None
;
loop
{
tokio
::
select!
{
_
=
cancel
.cancelled
()
=>
{
break
;
}
_
=
tokio
::
time
::
sleep
(
backoff_delay
)
=>
{
tracing
::
trace!
(
"collecting endpoints for service: {}"
,
service_subject
);
let
unfiltered_endpoints
=
match
collect_endpoints
(
&
component
,
&
service_subject
,
scrape_timeout
)
.await
{
Ok
(
v
)
=>
v
,
Err
(
e
)
=>
{
tracing
::
warn!
(
"Failed to retrieve endpoints for {}: {:?}"
,
service_subject
,
e
);
continue
;
}
};
let
endpoints
:
Vec
<
Endpoint
>
=
if
subject
==
KV_METRICS_ENDPOINT
{
// Original filtering behavior
unfiltered_endpoints
.into_iter
()
.filter_map
(|
s
|
{
s
.data
?
.decode
::
<
ForwardPassMetrics
>
()
.map
(|
data
|
Endpoint
{
name
:
s
.name
,
subject
:
s
.subject
,
data
:
LoadMetrics
::
EngineLoadMetrics
(
data
),
})
.inspect_err
(|
e
|
{
tracing
::
warn!
(
"skip endpoint data that can't be parsed as ForwardPassMetrics: {:?}"
,
e
);
})
.ok
()
})
.collect
()
}
else
{
// No filtering - just use default LoadMetrics
unfiltered_endpoints
.into_iter
()
.map
(|
s
|
Endpoint
{
name
:
s
.name
,
subject
:
s
.subject
,
data
:
LoadMetrics
::
default
(),
})
.collect
()
};
tracing
::
trace!
(
"Found {} endpoints for service: {service_subject}"
,
endpoints
.len
());
let
processed
=
ProcessedEndpoints
::
new
(
endpoints
);
// Only send if different from last sent value
// This is necessary because the watch channel does not track changes
// https://docs.rs/tokio/latest/tokio/sync/watch/struct.Receiver.html#method.has_changed
let
should_send
=
match
&
last_sent
{
Some
(
last
)
=>
last
!=
&
processed
,
None
=>
true
,
};
if
should_send
{
tracing
::
trace!
(
"Endpoints changed, sending update for service: {service_subject}"
);
if
watch_tx
.send
(
processed
.clone
())
.is_err
()
{
tracing
::
error!
(
"failed to send processed endpoints; shutting down"
);
break
;
}
last_sent
=
Some
(
processed
);
}
else
{
tracing
::
trace!
(
"Endpoints unchanged, skipping update for service: {service_subject}"
);
}
}
}
}
}
lib/llm/src/kv_router/publisher.rs
View file @
4c207e0c
...
@@ -2,25 +2,18 @@
...
@@ -2,25 +2,18 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
use
crate
::
kv_router
::{
use
crate
::
kv_router
::{
KV_EVENT_SUBJECT
,
KV_METRICS_ENDPOINT
,
KV_METRICS_SUBJECT
,
KV_EVENT_SUBJECT
,
KV_METRICS_SUBJECT
,
indexer
::{
RouterEvent
,
compute_block_hash_for_seq
},
indexer
::{
RouterEvent
,
compute_block_hash_for_seq
},
protocols
::
*
,
protocols
::
*
,
scoring
::
LoadEvent
,
scoring
::
LoadEvent
,
};
};
use
async_trait
::
async_trait
;
use
dynamo_runtime
::
metrics
::{
MetricsRegistry
,
prometheus_names
::
kvstats
};
use
dynamo_runtime
::
metrics
::{
MetricsRegistry
,
prometheus_names
::
kvstats
};
use
dynamo_runtime
::
traits
::{
DistributedRuntimeProvider
,
events
::
EventPublisher
};
use
dynamo_runtime
::
traits
::{
DistributedRuntimeProvider
,
events
::
EventPublisher
};
use
dynamo_runtime
::{
use
dynamo_runtime
::{
Error
,
Result
,
Result
,
component
::{
Component
,
Namespace
},
component
::{
Component
,
Namespace
},
pipeline
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ManyOut
,
ResponseStream
,
SingleIn
,
network
::
Ingress
,
},
protocols
::
annotated
::
Annotated
,
transports
::
nats
::{
NatsQueue
,
QUEUE_NAME
,
Slug
},
transports
::
nats
::{
NatsQueue
,
QUEUE_NAME
,
Slug
},
};
};
use
futures
::
stream
;
use
std
::
sync
::{
Arc
,
OnceLock
};
use
std
::
sync
::{
Arc
,
OnceLock
};
use
tokio
::
sync
::
mpsc
;
use
tokio
::
sync
::
mpsc
;
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
...
@@ -790,15 +783,7 @@ impl WorkerMetricsPublisher {
...
@@ -790,15 +783,7 @@ impl WorkerMetricsPublisher {
Ok
(())
Ok
(())
}
}
pub
async
fn
create_endpoint
(
pub
async
fn
create_endpoint
(
&
self
,
component
:
Component
)
->
Result
<
()
>
{
&
self
,
component
:
Component
,
metrics_labels
:
Option
<&
[(
&
str
,
&
str
)]
>
,
)
->
Result
<
()
>
{
let
mut
metrics_rx
=
self
.rx
.clone
();
let
handler
=
Arc
::
new
(
KvLoadEndpointHandler
::
new
(
metrics_rx
.clone
()));
let
handler
=
Ingress
::
for_engine
(
handler
)
?
;
let
worker_id
=
component
let
worker_id
=
component
.drt
()
.drt
()
.primary_lease
()
.primary_lease
()
...
@@ -809,31 +794,13 @@ impl WorkerMetricsPublisher {
...
@@ -809,31 +794,13 @@ impl WorkerMetricsPublisher {
});
});
self
.start_nats_metrics_publishing
(
component
.namespace
()
.clone
(),
worker_id
);
self
.start_nats_metrics_publishing
(
component
.namespace
()
.clone
(),
worker_id
);
Ok
(())
let
metrics_labels
=
metrics_labels
.map
(|
v
|
{
v
.iter
()
.map
(|(
k
,
v
)|
(
k
.to_string
(),
v
.to_string
()))
.collect
::
<
Vec
<
_
>>
()
});
component
.endpoint
(
KV_METRICS_ENDPOINT
)
.endpoint_builder
()
.stats_handler
(
move
|
_
|
{
let
metrics
=
metrics_rx
.borrow_and_update
()
.clone
();
serde_json
::
to_value
(
&*
metrics
)
.unwrap
()
})
.metrics_labels
(
metrics_labels
)
.handler
(
handler
)
.start
()
.await
}
}
/// Starts a background task to publish metrics over NATS
/// Starts a background task to publish metrics over NATS
///
///
/// This task monitors metric changes (specifically kv_active_blocks and num_requests_waiting)
/// This task monitors metric changes (specifically kv_active_blocks and num_requests_waiting)
/// and publishes stable metrics to NATS after they've been unchanged for 1ms.
/// and publishes stable metrics to NATS after they've been unchanged for 1ms.
#[allow(dead_code)]
fn
start_nats_metrics_publishing
(
&
self
,
namespace
:
Namespace
,
worker_id
:
i64
)
{
fn
start_nats_metrics_publishing
(
&
self
,
namespace
:
Namespace
,
worker_id
:
i64
)
{
let
nats_rx
=
self
.rx
.clone
();
let
nats_rx
=
self
.rx
.clone
();
...
@@ -907,32 +874,6 @@ impl WorkerMetricsPublisher {
...
@@ -907,32 +874,6 @@ impl WorkerMetricsPublisher {
}
}
}
}
struct
KvLoadEndpointHandler
{
metrics_rx
:
tokio
::
sync
::
watch
::
Receiver
<
Arc
<
ForwardPassMetrics
>>
,
}
impl
KvLoadEndpointHandler
{
pub
fn
new
(
metrics_rx
:
tokio
::
sync
::
watch
::
Receiver
<
Arc
<
ForwardPassMetrics
>>
)
->
Self
{
Self
{
metrics_rx
}
}
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
()
>
,
ManyOut
<
Annotated
<
ForwardPassMetrics
>>
,
Error
>
for
KvLoadEndpointHandler
{
async
fn
generate
(
&
self
,
request
:
SingleIn
<
()
>
,
)
->
Result
<
ManyOut
<
Annotated
<
ForwardPassMetrics
>>>
{
let
context
=
request
.context
();
let
metrics
=
self
.metrics_rx
.borrow
()
.clone
();
let
metrics
=
(
*
metrics
)
.clone
();
let
stream
=
stream
::
iter
(
vec!
[
Annotated
::
from_data
(
metrics
)]);
Ok
(
ResponseStream
::
new
(
Box
::
pin
(
stream
),
context
))
}
}
// -------------------------------------------------------------------------
// -------------------------------------------------------------------------
// Testing -----------------------------------------------------------------
// Testing -----------------------------------------------------------------
// -------------------------------------------------------------------------
// -------------------------------------------------------------------------
...
...
lib/llm/src/mocker/engine.rs
View file @
4c207e0c
...
@@ -184,7 +184,7 @@ impl MockVllmEngine {
...
@@ -184,7 +184,7 @@ impl MockVllmEngine {
tokio
::
spawn
({
tokio
::
spawn
({
let
publisher
=
metrics_publisher
.clone
();
let
publisher
=
metrics_publisher
.clone
();
async
move
{
async
move
{
if
let
Err
(
e
)
=
publisher
.create_endpoint
(
comp
.clone
()
,
None
)
.await
{
if
let
Err
(
e
)
=
publisher
.create_endpoint
(
comp
.clone
())
.await
{
tracing
::
error!
(
"Metrics endpoint failed: {e}"
);
tracing
::
error!
(
"Metrics endpoint failed: {e}"
);
}
}
}
}
...
@@ -715,36 +715,6 @@ mod integration_tests {
...
@@ -715,36 +715,6 @@ mod integration_tests {
}
}
}
}
// Use KvMetricsAggregator to get metrics more easily
let
cancel_token
=
test_component
.drt
()
.runtime
()
.child_token
();
let
metrics_aggregator
=
crate
::
kv_router
::
metrics_aggregator
::
KvMetricsAggregator
::
new
(
test_component
.clone
(),
cancel_token
,
)
.await
;
tokio
::
time
::
sleep
(
Duration
::
from_millis
(
500
))
.await
;
let
processed_endpoints
=
metrics_aggregator
.get_endpoints
();
tracing
::
info!
(
"Found {} metrics endpoints"
,
processed_endpoints
.endpoints
.len
()
);
// Verify we found at least one metrics endpoint
assert
!
(
!
processed_endpoints
.endpoints
.is_empty
(),
"Should find at least one metrics endpoint"
);
tracing
::
info!
(
"✓ Successfully found {} metrics endpoints"
,
processed_endpoints
.endpoints
.len
()
);
// Verify the metrics endpoints contain valid data
for
(
worker_id
,
endpoint
)
in
&
processed_endpoints
.endpoints
{
tracing
::
info!
(
"✓ Worker {} metrics: {:?}"
,
worker_id
,
endpoint
.data
);
}
tracing
::
info!
(
"🎉 Event verification completed!"
);
tracing
::
info!
(
"🎉 Event verification completed!"
);
// Cleanup
// Cleanup
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment