Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
f6ed01b1
Unverified
Commit
f6ed01b1
authored
Oct 20, 2025
by
Graham King
Committed by
GitHub
Oct 20, 2025
Browse files
chore: Replace ServiceConfigBuilder with add_stats_service (#3736)
Signed-off-by:
Graham King
<
grahamk@nvidia.com
>
parent
a7fed329
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
85 additions
and
138 deletions
+85
-138
lib/bindings/python/rust/lib.rs
lib/bindings/python/rust/lib.rs
+3
-2
lib/llm/src/block_manager/controller.rs
lib/llm/src/block_manager/controller.rs
+4
-3
lib/llm/src/entrypoint/input/endpoint.rs
lib/llm/src/entrypoint/input/endpoint.rs
+1
-2
lib/llm/src/kv_router/sequence.rs
lib/llm/src/kv_router/sequence.rs
+4
-10
lib/llm/src/mocker/engine.rs
lib/llm/src/mocker/engine.rs
+2
-6
lib/runtime/examples/hello_world/src/bin/server.rs
lib/runtime/examples/hello_world/src/bin/server.rs
+3
-6
lib/runtime/examples/service_metrics/src/bin/service_server.rs
...untime/examples/service_metrics/src/bin/service_server.rs
+3
-6
lib/runtime/examples/system_metrics/src/lib.rs
lib/runtime/examples/system_metrics/src/lib.rs
+4
-6
lib/runtime/src/component.rs
lib/runtime/src/component.rs
+42
-2
lib/runtime/src/component/service.rs
lib/runtime/src/component/service.rs
+1
-73
lib/runtime/src/metrics.rs
lib/runtime/src/metrics.rs
+12
-9
lib/runtime/src/system_status_server.rs
lib/runtime/src/system_status_server.rs
+3
-7
lib/runtime/tests/soak.rs
lib/runtime/tests/soak.rs
+3
-6
No files found.
lib/bindings/python/rust/lib.rs
View file @
f6ed01b1
...
...
@@ -664,10 +664,11 @@ impl Component {
})
}
/// NATS specific stats/metrics call
fn
create_service
<
'p
>
(
&
self
,
py
:
Python
<
'p
>
)
->
PyResult
<
Bound
<
'p
,
PyAny
>>
{
let
build
er
=
self
.inner
.
service_builder
();
let
mut
inn
er
=
self
.inner
.
clone
();
pyo3_async_runtimes
::
tokio
::
future_into_py
(
py
,
async
move
{
let
_
=
builder
.creat
e
()
.await
.map_err
(
to_pyerr
)
?
;
inner
.add_stats_servic
e
()
.await
.map_err
(
to_pyerr
)
?
;
Ok
(())
})
}
...
...
lib/llm/src/block_manager/controller.rs
View file @
f6ed01b1
...
...
@@ -39,16 +39,17 @@ pub struct Controller<Locality: LocalityProvider, Metadata: BlockMetadata> {
impl
<
Locality
:
LocalityProvider
,
Metadata
:
BlockMetadata
>
Controller
<
Locality
,
Metadata
>
{
pub
async
fn
new
(
block_manager
:
KvBlockManager
<
Locality
,
Metadata
>
,
component
:
dynamo_runtime
::
component
::
Component
,
mut
component
:
dynamo_runtime
::
component
::
Component
,
)
->
anyhow
::
Result
<
Self
>
{
let
service
=
component
.service_builder
()
.creat
e
()
.await
?
;
component
.add_stats_servic
e
()
.await
?
;
let
handler
=
ControllerHandler
::
new
(
block_manager
.clone
());
let
engine
=
Ingress
::
for_engine
(
handler
.clone
())
?
;
let
component_clone
=
component
.clone
();
let
reset_task
=
CriticalTaskExecutionHandle
::
new
(
|
_
cancel_token
|
async
move
{
servic
e
component_clon
e
.endpoint
(
"controller"
)
.endpoint_builder
()
.handler
(
engine
)
...
...
lib/llm/src/entrypoint/input/endpoint.rs
View file @
f6ed01b1
...
...
@@ -38,8 +38,7 @@ pub async fn run(
// We can only make the NATS service if we have NATS
if
distributed_runtime
.nats_client
()
.is_some
()
{
// TODO fix in next PR, ServiceConfigBuilder is silly
component
=
component
.service_builder
()
.create
()
.await
?
;
component
.add_stats_service
()
.await
?
;
}
let
endpoint
=
component
.endpoint
(
&
endpoint_id
.name
);
...
...
lib/llm/src/kv_router/sequence.rs
View file @
f6ed01b1
...
...
@@ -939,11 +939,8 @@ mod tests {
// Create namespace and shared component for both seq_managers
let
namespace
=
distributed
.namespace
(
"test_cross_instance_sync"
)
?
;
let
component
=
namespace
.component
(
"sequences"
)
?
.service_builder
()
.create
()
.await
?
;
let
mut
component
=
namespace
.component
(
"sequences"
)
?
;
component
.add_stats_service
()
.await
?
;
// Create multi-worker sequence managers with:
// - Worker 0 with dp_size=2 (dp_ranks 0 and 1)
...
...
@@ -1108,11 +1105,8 @@ mod tests {
// Create namespace and shared component for both seq_managers
let
namespace
=
distributed
.namespace
(
"test_no_token_seq_sync"
)
?
;
let
component
=
namespace
.component
(
"sequences"
)
?
.service_builder
()
.create
()
.await
?
;
let
mut
component
=
namespace
.component
(
"sequences"
)
?
;
component
.add_stats_service
()
.await
?
;
// Create multi-worker sequence managers with ALL workers [0, 1, 2]
// Both use the same component to ensure event synchronization works
...
...
lib/llm/src/mocker/engine.rs
View file @
f6ed01b1
...
...
@@ -539,12 +539,8 @@ mod integration_tests {
tracing
::
info!
(
"✓ Runtime and distributed runtime created"
);
// Create component for MockVllmEngine (needed for publishers)
let
test_component
=
distributed
.namespace
(
"test"
)
?
.component
(
MOCKER_COMPONENT
)
?
.service_builder
()
.create
()
.await
?
;
let
mut
test_component
=
distributed
.namespace
(
"test"
)
?
.component
(
MOCKER_COMPONENT
)
?
;
test_component
.add_stats_service
()
.await
?
;
tracing
::
info!
(
"✓ Test component created"
);
// Create MockVllmEngine WITH component (enables publishers)
...
...
lib/runtime/examples/hello_world/src/bin/server.rs
View file @
f6ed01b1
...
...
@@ -54,12 +54,9 @@ async fn backend(runtime: DistributedRuntime) -> Result<()> {
// // make the ingress discoverable via a component service
// // we must first create a service, then we can attach one more more endpoints
runtime
.namespace
(
DEFAULT_NAMESPACE
)
?
.component
(
"backend"
)
?
.service_builder
()
.create
()
.await
?
let
mut
component
=
runtime
.namespace
(
DEFAULT_NAMESPACE
)
?
.component
(
"backend"
)
?
;
component
.add_stats_service
()
.await
?
;
component
.endpoint
(
"generate"
)
.endpoint_builder
()
.handler
(
ingress
)
...
...
lib/runtime/examples/service_metrics/src/bin/service_server.rs
View file @
f6ed01b1
...
...
@@ -56,12 +56,9 @@ async fn backend(runtime: DistributedRuntime) -> Result<()> {
// make the ingress discoverable via a component service
// we must first create a service, then we can attach one more more endpoints
runtime
.namespace
(
DEFAULT_NAMESPACE
)
?
.component
(
"backend"
)
?
.service_builder
()
.create
()
.await
?
let
mut
component
=
runtime
.namespace
(
DEFAULT_NAMESPACE
)
?
.component
(
"backend"
)
?
;
component
.add_stats_service
()
.await
?
;
component
.endpoint
(
"generate"
)
.endpoint_builder
()
.stats_handler
(|
stats
|
{
...
...
lib/runtime/examples/system_metrics/src/lib.rs
View file @
f6ed01b1
...
...
@@ -88,13 +88,11 @@ impl AsyncEngine<SingleIn<String>, ManyOut<Annotated<String>>, Error> for Reques
pub
async
fn
backend
(
drt
:
DistributedRuntime
,
endpoint_name
:
Option
<&
str
>
)
->
Result
<
()
>
{
let
endpoint_name
=
endpoint_name
.unwrap_or
(
DEFAULT_ENDPOINT
);
let
endpoi
nt
=
drt
let
mut
compone
nt
=
drt
.namespace
(
DEFAULT_NAMESPACE
)
?
.component
(
DEFAULT_COMPONENT
)
?
.service_builder
()
.create
()
.await
?
.endpoint
(
endpoint_name
);
.component
(
DEFAULT_COMPONENT
)
?
;
component
.add_stats_service
()
.await
?
;
let
endpoint
=
component
.endpoint
(
endpoint_name
);
// Create custom metrics for system stats
let
system_metrics
=
...
...
lib/runtime/src/component.rs
View file @
f6ed01b1
...
...
@@ -371,8 +371,48 @@ impl Component {
unimplemented!
(
"collect_stats"
)
}
pub
fn
service_builder
(
&
self
)
->
service
::
ServiceConfigBuilder
{
service
::
ServiceConfigBuilder
::
from_component
(
self
.clone
())
pub
async
fn
add_stats_service
(
&
mut
self
)
->
anyhow
::
Result
<
()
>
{
let
service_name
=
self
.service_name
();
// Pre-check to save cost of creating the service, but don't hold the lock
if
self
.drt
.component_registry
.inner
.lock
()
.await
.services
.contains_key
(
&
service_name
)
{
anyhow
::
bail!
(
"Service {service_name} already exists"
);
}
let
Some
(
nats_client
)
=
self
.drt
.nats_client
()
else
{
anyhow
::
bail!
(
"Cannot create NATS service without NATS."
);
};
let
description
=
None
;
let
(
nats_service
,
stats_reg
)
=
service
::
build_nats_service
(
nats_client
,
self
,
description
)
.await
?
;
let
mut
guard
=
self
.drt.component_registry.inner
.lock
()
.await
;
if
!
guard
.services
.contains_key
(
&
service_name
)
{
// Normal case
guard
.services
.insert
(
service_name
.clone
(),
nats_service
);
guard
.stats_handlers
.insert
(
service_name
.clone
(),
stats_reg
);
drop
(
guard
);
}
else
{
drop
(
guard
);
let
_
=
nats_service
.stop
()
.await
;
return
Err
(
anyhow
::
anyhow!
(
"Service create race for {service_name}, now already exists"
));
}
// Register metrics callback. CRITICAL: Never fail service creation for metrics issues.
if
let
Err
(
err
)
=
self
.start_scraping_nats_service_component_metrics
()
{
tracing
::
debug!
(
service_name
,
error
=
%
err
,
"Metrics registration failed"
);
}
Ok
(())
}
}
...
...
lib/runtime/src/component/service.rs
View file @
f6ed01b1
...
...
@@ -12,8 +12,6 @@ use crate::component::Component;
pub
use
super
::
endpoint
::
EndpointStats
;
use
educe
::
Educe
;
type
StatsHandlerRegistry
=
Arc
<
Mutex
<
HashMap
<
String
,
EndpointStatsHandler
>>>
;
pub
type
StatsHandler
=
Box
<
dyn
FnMut
(
String
,
EndpointStats
)
->
serde_json
::
Value
+
Send
+
Sync
+
'static
>
;
...
...
@@ -23,71 +21,7 @@ pub type EndpointStatsHandler =
pub
const
PROJECT_NAME
:
&
str
=
"Dynamo"
;
const
SERVICE_VERSION
:
&
str
=
env!
(
"CARGO_PKG_VERSION"
);
#[derive(Educe,
Builder,
Dissolve)]
#[educe(Debug)]
#[builder(pattern
=
"owned"
,
build_fn(private,
name
=
"build_internal"
))]
pub
struct
ServiceConfig
{
#[builder(private)]
component
:
Component
,
/// Description
#[builder(default)]
description
:
Option
<
String
>
,
}
impl
ServiceConfigBuilder
{
/// Create the [`Component`]'s service and store it in the registry.
pub
async
fn
create
(
self
)
->
anyhow
::
Result
<
Component
>
{
let
(
component
,
description
)
=
self
.build_internal
()
?
.dissolve
();
let
service_name
=
component
.service_name
();
// Pre-check to save cost of creating the service, but don't hold the lock
if
component
.drt
.component_registry
.inner
.lock
()
.await
.services
.contains_key
(
&
service_name
)
{
anyhow
::
bail!
(
"Service {service_name} already exists"
);
}
let
Some
(
nats_client
)
=
component
.drt
.nats_client
()
else
{
anyhow
::
bail!
(
"Cannot create NATS service without NATS."
);
};
let
(
nats_service
,
stats_reg
)
=
build_nats_service
(
nats_client
,
&
component
,
description
)
.await
?
;
let
mut
guard
=
component
.drt.component_registry.inner
.lock
()
.await
;
if
!
guard
.services
.contains_key
(
&
service_name
)
{
// Normal case
guard
.services
.insert
(
service_name
.clone
(),
nats_service
);
guard
.stats_handlers
.insert
(
service_name
,
stats_reg
);
drop
(
guard
);
}
else
{
drop
(
guard
);
let
_
=
nats_service
.stop
()
.await
;
return
Err
(
anyhow
::
anyhow!
(
"Service create race for {service_name}, now already exists"
));
}
// Register metrics callback. CRITICAL: Never fail service creation for metrics issues.
if
let
Err
(
err
)
=
component
.start_scraping_nats_service_component_metrics
()
{
tracing
::
debug!
(
"Metrics registration failed for '{}': {}"
,
component
.service_name
(),
err
);
}
Ok
(
component
)
}
}
async
fn
build_nats_service
(
pub
async
fn
build_nats_service
(
nats_client
:
&
crate
::
transports
::
nats
::
Client
,
component
:
&
Component
,
description
:
Option
<
String
>
,
...
...
@@ -123,9 +57,3 @@ async fn build_nats_service(
Ok
((
nats_service
,
stats_handler_registry_clone
))
}
impl
ServiceConfigBuilder
{
pub
(
crate
)
fn
from_component
(
component
:
Component
)
->
Self
{
Self
::
default
()
.component
(
component
)
}
}
lib/runtime/src/metrics.rs
View file @
f6ed01b1
...
...
@@ -1347,17 +1347,17 @@ mod test_metricsregistry_nats {
// Setup real DRT and registry using the test-friendly constructor
let
drt
=
create_test_drt_async
()
.await
;
// Create a namespace and component
s
from the DRT
// Create a namespace and component from the DRT
let
namespace
=
drt
.namespace
(
"ns789"
)
.unwrap
();
let
component
s
=
namespace
.component
(
"comp789"
)
.unwrap
();
let
mut
component
=
namespace
.component
(
"comp789"
)
.unwrap
();
// Create a service to trigger metrics callback registration
let
_
service
=
components
.service_builder
()
.creat
e
()
.await
.unwrap
();
component
.add_stats_servic
e
()
.await
.unwrap
();
// Get component
s
output which should include NATS client metrics
// Get component output which should include NATS client metrics
// Additional checks for NATS client metrics (without checking specific values)
let
component_nats_metrics
=
super
::
test_helpers
::
extract_nats_lines
(
&
component
s
.prometheus_expfmt
()
.unwrap
());
super
::
test_helpers
::
extract_nats_lines
(
&
component
.prometheus_expfmt
()
.unwrap
());
println!
(
"Component NATS metrics count: {}"
,
component_nats_metrics
.len
()
...
...
@@ -1371,7 +1371,7 @@ mod test_metricsregistry_nats {
// Check for specific NATS client metric names (without values)
let
component_metrics
=
super
::
test_helpers
::
extract_metrics
(
&
component
s
.prometheus_expfmt
()
.unwrap
());
super
::
test_helpers
::
extract_metrics
(
&
component
.prometheus_expfmt
()
.unwrap
());
let
actual_component_nats_metrics_sorted
:
Vec
<&
str
>
=
component_metrics
.iter
()
.map
(|
line
|
{
...
...
@@ -1457,12 +1457,15 @@ mod test_metricsregistry_nats {
let
runtime
=
Runtime
::
from_current
()
?
;
let
drt
=
DistributedRuntime
::
from_settings
(
runtime
.clone
())
.await
?
;
let
namespace
=
drt
.namespace
(
"ns123"
)
.unwrap
();
let
component
=
namespace
.component
(
"comp123"
)
.unwrap
();
let
mut
component
=
namespace
.component
(
"comp123"
)
.unwrap
();
let
ingress
=
Ingress
::
for_engine
(
MessageHandler
::
new
())
.unwrap
();
let
_
backend_handle
=
tokio
::
spawn
(
async
move
{
let
service
=
component
.service_builder
()
.create
()
.await
.unwrap
();
let
endpoint
=
service
.endpoint
(
"echo"
)
.endpoint_builder
()
.handler
(
ingress
);
component
.add_stats_service
()
.await
.unwrap
();
let
endpoint
=
component
.endpoint
(
"echo"
)
.endpoint_builder
()
.handler
(
ingress
);
endpoint
.start
()
.await
.unwrap
();
});
...
...
lib/runtime/src/system_status_server.rs
View file @
f6ed01b1
...
...
@@ -615,7 +615,7 @@ mod integration_tests {
// Now create a namespace, component, and endpoint to make the system healthy
let
namespace
=
drt
.namespace
(
"ns1234"
)
.unwrap
();
let
component
=
namespace
.component
(
"comp1234"
)
.unwrap
();
let
mut
component
=
namespace
.component
(
"comp1234"
)
.unwrap
();
// Create a simple test handler
use
crate
::
pipeline
::{
async_trait
,
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
SingleIn
};
...
...
@@ -641,12 +641,8 @@ mod integration_tests {
// Start the service and endpoint with a health check payload
// This will automatically register the endpoint for health monitoring
tokio
::
spawn
(
async
move
{
let
_
=
component
.service_builder
()
.create
()
.await
.unwrap
()
.endpoint
(
ENDPOINT_NAME
)
component
.add_stats_service
()
.await
.unwrap
();
let
_
=
component
.endpoint
(
ENDPOINT_NAME
)
.endpoint_builder
()
.handler
(
ingress
)
.health_check_payload
(
serde_json
::
json!
({
...
...
lib/runtime/tests/soak.rs
View file @
f6ed01b1
...
...
@@ -116,12 +116,9 @@ mod integration {
// // make the ingress discoverable via a component service
// // we must first create a service, then we can attach one more more endpoints
runtime
.namespace
(
DEFAULT_NAMESPACE
)
?
.component
(
"backend"
)
?
.service_builder
()
.create
()
.await
?
let
mut
component
=
runtime
.namespace
(
DEFAULT_NAMESPACE
)
?
.component
(
"backend"
)
?
;
component
.add_stats_service
()
.await
?
;
component
.endpoint
(
"generate"
)
.endpoint_builder
()
.handler
(
ingress
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment