Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
cf630bf7
Unverified
Commit
cf630bf7
authored
Nov 10, 2025
by
Graham King
Committed by
GitHub
Nov 10, 2025
Browse files
refactor: Make the Runtime and DistributedRuntime fields private (#4193)
Signed-off-by:
Graham King
<
grahamk@nvidia.com
>
parent
0e623146
Changes
54
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
97 additions
and
128 deletions
+97
-128
lib/runtime/src/service.rs
lib/runtime/src/service.rs
+3
-2
lib/runtime/src/system_status_server.rs
lib/runtime/src/system_status_server.rs
+21
-18
lib/runtime/src/traits.rs
lib/runtime/src/traits.rs
+1
-1
lib/runtime/src/traits/events.rs
lib/runtime/src/traits/events.rs
+3
-3
lib/runtime/src/transports/etcd.rs
lib/runtime/src/transports/etcd.rs
+11
-11
lib/runtime/src/transports/etcd/connector.rs
lib/runtime/src/transports/etcd/connector.rs
+2
-4
lib/runtime/src/transports/etcd/lock.rs
lib/runtime/src/transports/etcd/lock.rs
+1
-1
lib/runtime/src/transports/nats.rs
lib/runtime/src/transports/nats.rs
+2
-1
lib/runtime/src/transports/zmq.rs
lib/runtime/src/transports/zmq.rs
+12
-10
lib/runtime/src/utils/typed_prefix_watcher.rs
lib/runtime/src/utils/typed_prefix_watcher.rs
+1
-1
lib/runtime/src/worker.rs
lib/runtime/src/worker.rs
+21
-20
lib/runtime/tests/lifecycle.rs
lib/runtime/tests/lifecycle.rs
+3
-48
lib/runtime/tests/pipeline.rs
lib/runtime/tests/pipeline.rs
+13
-7
lib/runtime/tests/soak.rs
lib/runtime/tests/soak.rs
+3
-1
No files found.
lib/runtime/src/service.rs
View file @
cf630bf7
...
@@ -8,15 +8,16 @@
...
@@ -8,15 +8,16 @@
// component's "service state"
// component's "service state"
use
crate
::{
use
crate
::{
DistributedRuntime
,
Result
,
DistributedRuntime
,
component
::
Component
,
component
::
Component
,
error
,
metrics
::{
MetricsHierarchy
,
prometheus_names
,
prometheus_names
::
nats_service
},
metrics
::{
MetricsHierarchy
,
prometheus_names
,
prometheus_names
::
nats_service
},
traits
::
*
,
traits
::
*
,
transports
::
nats
,
transports
::
nats
,
utils
::
stream
,
utils
::
stream
,
};
};
use
anyhow
::
Result
;
use
anyhow
::
anyhow
as
error
;
use
async_nats
::
Message
;
use
async_nats
::
Message
;
use
async_stream
::
try_stream
;
use
async_stream
::
try_stream
;
use
bytes
::
Bytes
;
use
bytes
::
Bytes
;
...
...
lib/runtime/src/system_status_server.rs
View file @
cf630bf7
...
@@ -81,13 +81,13 @@ pub async fn spawn_system_status_server(
...
@@ -81,13 +81,13 @@ pub async fn spawn_system_status_server(
let
server_state
=
Arc
::
new
(
SystemStatusState
::
new
(
drt
)
?
);
let
server_state
=
Arc
::
new
(
SystemStatusState
::
new
(
drt
)
?
);
let
health_path
=
server_state
let
health_path
=
server_state
.drt
()
.drt
()
.system_health
.system_health
()
.lock
()
.lock
()
.health_path
()
.health_path
()
.to_string
();
.to_string
();
let
live_path
=
server_state
let
live_path
=
server_state
.drt
()
.drt
()
.system_health
.system_health
()
.lock
()
.lock
()
.live_path
()
.live_path
()
.to_string
();
.to_string
();
...
@@ -158,9 +158,11 @@ pub async fn spawn_system_status_server(
...
@@ -158,9 +158,11 @@ pub async fn spawn_system_status_server(
#[tracing::instrument(skip_all,
level
=
"trace"
)]
#[tracing::instrument(skip_all,
level
=
"trace"
)]
async
fn
health_handler
(
state
:
Arc
<
SystemStatusState
>
)
->
impl
IntoResponse
{
async
fn
health_handler
(
state
:
Arc
<
SystemStatusState
>
)
->
impl
IntoResponse
{
// Get basic health status
// Get basic health status
let
system_health
=
state
.drt
()
.system_health
.lock
();
let
system_health
=
state
.drt
()
.system_health
();
let
(
healthy
,
endpoints
)
=
system_health
.get_health_status
();
let
system_health_lock
=
system_health
.lock
();
let
uptime
=
Some
(
system_health
.uptime
());
let
(
healthy
,
endpoints
)
=
system_health_lock
.get_health_status
();
let
uptime
=
Some
(
system_health_lock
.uptime
());
drop
(
system_health_lock
);
let
healthy_string
=
if
healthy
{
"ready"
}
else
{
"notready"
};
let
healthy_string
=
if
healthy
{
"ready"
}
else
{
"notready"
};
let
status_code
=
if
healthy
{
let
status_code
=
if
healthy
{
...
@@ -184,7 +186,7 @@ async fn health_handler(state: Arc<SystemStatusState>) -> impl IntoResponse {
...
@@ -184,7 +186,7 @@ async fn health_handler(state: Arc<SystemStatusState>) -> impl IntoResponse {
#[tracing::instrument(skip_all,
level
=
"trace"
)]
#[tracing::instrument(skip_all,
level
=
"trace"
)]
async
fn
metrics_handler
(
state
:
Arc
<
SystemStatusState
>
)
->
impl
IntoResponse
{
async
fn
metrics_handler
(
state
:
Arc
<
SystemStatusState
>
)
->
impl
IntoResponse
{
// Update the uptime gauge with current value
// Update the uptime gauge with current value
state
.drt
()
.system_health
.lock
()
.update_uptime_gauge
();
state
.drt
()
.system_health
()
.lock
()
.update_uptime_gauge
();
// Get all metrics from DistributedRuntime
// Get all metrics from DistributedRuntime
// Note: In the new hierarchy-based architecture, metrics are automatically registered
// Note: In the new hierarchy-based architecture, metrics are automatically registered
...
@@ -260,13 +262,13 @@ mod integration_tests {
...
@@ -260,13 +262,13 @@ mod integration_tests {
let
drt
=
create_test_drt_async
()
.await
;
let
drt
=
create_test_drt_async
()
.await
;
// Get uptime from SystemHealth
// Get uptime from SystemHealth
let
uptime
=
drt
.system_health
.lock
()
.uptime
();
let
uptime
=
drt
.system_health
()
.lock
()
.uptime
();
// Uptime should exist (even if close to zero)
// Uptime should exist (even if close to zero)
assert
!
(
uptime
.as_nanos
()
>
0
||
uptime
.is_zero
());
assert
!
(
uptime
.as_nanos
()
>
0
||
uptime
.is_zero
());
// Sleep briefly and check uptime increases
// Sleep briefly and check uptime increases
tokio
::
time
::
sleep
(
std
::
time
::
Duration
::
from_millis
(
100
))
.await
;
tokio
::
time
::
sleep
(
std
::
time
::
Duration
::
from_millis
(
100
))
.await
;
let
uptime_after
=
drt
.system_health
.lock
()
.uptime
();
let
uptime_after
=
drt
.system_health
()
.lock
()
.uptime
();
assert
!
(
uptime_after
>
uptime
);
assert
!
(
uptime_after
>
uptime
);
})
})
.await
;
.await
;
...
@@ -317,19 +319,19 @@ mod integration_tests {
...
@@ -317,19 +319,19 @@ mod integration_tests {
let
drt
=
create_test_drt_async
()
.await
;
let
drt
=
create_test_drt_async
()
.await
;
// Get initial uptime
// Get initial uptime
let
initial_uptime
=
drt
.system_health
.lock
()
.uptime
();
let
initial_uptime
=
drt
.system_health
()
.lock
()
.uptime
();
// Update the gauge with initial value
// Update the gauge with initial value
drt
.system_health
.lock
()
.update_uptime_gauge
();
drt
.system_health
()
.lock
()
.update_uptime_gauge
();
// Sleep for 100ms
// Sleep for 100ms
tokio
::
time
::
sleep
(
std
::
time
::
Duration
::
from_millis
(
100
))
.await
;
tokio
::
time
::
sleep
(
std
::
time
::
Duration
::
from_millis
(
100
))
.await
;
// Get uptime after sleep
// Get uptime after sleep
let
uptime_after_sleep
=
drt
.system_health
.lock
()
.uptime
();
let
uptime_after_sleep
=
drt
.system_health
()
.lock
()
.uptime
();
// Update the gauge again
// Update the gauge again
drt
.system_health
.lock
()
.update_uptime_gauge
();
drt
.system_health
()
.lock
()
.update_uptime_gauge
();
// Verify uptime increased by at least 100ms
// Verify uptime increased by at least 100ms
let
elapsed
=
uptime_after_sleep
-
initial_uptime
;
let
elapsed
=
uptime_after_sleep
-
initial_uptime
;
...
@@ -582,8 +584,8 @@ mod integration_tests {
...
@@ -582,8 +584,8 @@ mod integration_tests {
struct
TestHandler
;
struct
TestHandler
;
#[async_trait]
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
String
>
,
ManyOut
<
Annotated
<
String
>>
,
Error
>
for
TestHandler
{
impl
AsyncEngine
<
SingleIn
<
String
>
,
ManyOut
<
Annotated
<
String
>>
,
anyhow
::
Error
>
for
TestHandler
{
async
fn
generate
(
&
self
,
input
:
SingleIn
<
String
>
)
->
crate
::
Result
<
ManyOut
<
Annotated
<
String
>>>
{
async
fn
generate
(
&
self
,
input
:
SingleIn
<
String
>
)
->
anyhow
::
Result
<
ManyOut
<
Annotated
<
String
>>>
{
let
(
data
,
ctx
)
=
input
.into_parts
();
let
(
data
,
ctx
)
=
input
.into_parts
();
let
response
=
Annotated
::
from_data
(
format!
(
"You responded: {}"
,
data
));
let
response
=
Annotated
::
from_data
(
format!
(
"You responded: {}"
,
data
));
Ok
(
crate
::
pipeline
::
ResponseStream
::
new
(
Ok
(
crate
::
pipeline
::
ResponseStream
::
new
(
...
@@ -733,8 +735,9 @@ mod integration_tests {
...
@@ -733,8 +735,9 @@ mod integration_tests {
// Register the endpoint and its health check payload
// Register the endpoint and its health check payload
{
{
let
system_health
=
drt
.system_health
.lock
();
let
system_health
=
drt
.system_health
();
system_health
.register_health_check_target
(
let
system_health_lock
=
system_health
.lock
();
system_health_lock
.register_health_check_target
(
endpoint
,
endpoint
,
crate
::
component
::
Instance
{
crate
::
component
::
Instance
{
component
:
"test_component"
.to_string
(),
component
:
"test_component"
.to_string
(),
...
@@ -760,7 +763,7 @@ mod integration_tests {
...
@@ -760,7 +763,7 @@ mod integration_tests {
);
);
// Set endpoint to healthy state
// Set endpoint to healthy state
drt
.system_health
drt
.system_health
()
.lock
()
.lock
()
.set_endpoint_health_status
(
endpoint
,
HealthStatus
::
Ready
);
.set_endpoint_health_status
(
endpoint
,
HealthStatus
::
Ready
);
...
@@ -777,7 +780,7 @@ mod integration_tests {
...
@@ -777,7 +780,7 @@ mod integration_tests {
// Verify the endpoint status in SystemHealth directly
// Verify the endpoint status in SystemHealth directly
let
endpoint_status
=
drt
let
endpoint_status
=
drt
.system_health
.system_health
()
.lock
()
.lock
()
.get_endpoint_health_status
(
endpoint
);
.get_endpoint_health_status
(
endpoint
);
assert_eq!
(
assert_eq!
(
...
...
lib/runtime/src/traits.rs
View file @
cf630bf7
...
@@ -16,7 +16,7 @@ pub trait DistributedRuntimeProvider {
...
@@ -16,7 +16,7 @@ pub trait DistributedRuntimeProvider {
impl
RuntimeProvider
for
DistributedRuntime
{
impl
RuntimeProvider
for
DistributedRuntime
{
fn
rt
(
&
self
)
->
&
Runtime
{
fn
rt
(
&
self
)
->
&
Runtime
{
&
self
.runtime
self
.runtime
()
}
}
}
}
...
...
lib/runtime/src/traits/events.rs
View file @
cf630bf7
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
use
async_trait
::
async_trait
;
use
serde
::{
Deserialize
,
Serialize
};
use
std
::
fmt
::
Debug
;
use
std
::
fmt
::
Debug
;
use
crate
::
Result
;
use
anyhow
::
Result
;
use
async_trait
::
async_trait
;
use
serde
::{
Deserialize
,
Serialize
};
// #[async_trait]
// #[async_trait]
// pub trait Publisher: Debug + Clone + Send + Sync {
// pub trait Publisher: Debug + Clone + Send + Sync {
...
...
lib/runtime/src/transports/etcd.rs
View file @
cf630bf7
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
use
crate
::{
CancellationToken
,
ErrorContext
,
Result
,
Runtime
,
error
};
use
crate
::
runtime
::
Runtime
;
use
anyhow
::{
Context
,
Result
};
use
async_nats
::
jetstream
::
kv
;
use
async_nats
::
jetstream
::
kv
;
use
derive_builder
::
Builder
;
use
derive_builder
::
Builder
;
...
@@ -19,6 +20,7 @@ use etcd_client::{
...
@@ -19,6 +20,7 @@ use etcd_client::{
};
};
pub
use
etcd_client
::{
ConnectOptions
,
KeyValue
,
LeaseClient
};
pub
use
etcd_client
::{
ConnectOptions
,
KeyValue
,
LeaseClient
};
use
tokio
::
time
::{
Duration
,
interval
};
use
tokio
::
time
::{
Duration
,
interval
};
use
tokio_util
::
sync
::
CancellationToken
;
mod
connector
;
mod
connector
;
mod
lease
;
mod
lease
;
...
@@ -139,7 +141,7 @@ impl Client {
...
@@ -139,7 +141,7 @@ impl Client {
for
resp
in
result
.op_responses
()
{
for
resp
in
result
.op_responses
()
{
tracing
::
warn!
(
response
=
?
resp
,
"kv_create etcd op response"
);
tracing
::
warn!
(
response
=
?
resp
,
"kv_create etcd op response"
);
}
}
Err
(
error
!
(
"Unable to create key. Check etcd server status"
)
)
anyhow
::
bail
!
(
"Unable to create key. Check etcd server status"
)
}
}
}
}
...
@@ -180,17 +182,15 @@ impl Client {
...
@@ -180,17 +182,15 @@ impl Client {
Some
(
response
)
=>
match
response
{
Some
(
response
)
=>
match
response
{
TxnOpResponse
::
Txn
(
response
)
=>
match
response
.succeeded
()
{
TxnOpResponse
::
Txn
(
response
)
=>
match
response
.succeeded
()
{
true
=>
Ok
(()),
true
=>
Ok
(()),
false
=>
Err
(
error
!
(
false
=>
anyhow
::
bail
!
(
"Unable to create or validate key. Check etcd server status"
"Unable to create or validate key. Check etcd server status"
)
),
),
},
},
_
=>
Err
(
error!
(
_
=>
{
"Unable to validate key operation. Check etcd server status"
anyhow
::
bail!
(
"Unable to validate key operation. Check etcd server status"
)
)),
}
},
},
None
=>
Err
(
error!
(
None
=>
anyhow
::
bail!
(
"Unable to create or validate key. Check etcd server status"
),
"Unable to create or validate key. Check etcd server status"
)),
}
}
}
}
}
}
...
@@ -372,7 +372,7 @@ impl Client {
...
@@ -372,7 +372,7 @@ impl Client {
// Get the start revision
// Get the start revision
let
mut
start_revision
=
get_response
let
mut
start_revision
=
get_response
.header
()
.header
()
.ok_or
(
error
!
(
"missing header; unable to get revision"
))
?
.ok_or
(
anyhow
::
anyhow
!
(
"missing header; unable to get revision"
))
?
.revision
();
.revision
();
tracing
::
trace!
(
"{prefix}: start_revision: {start_revision}"
);
tracing
::
trace!
(
"{prefix}: start_revision: {start_revision}"
);
start_revision
+=
1
;
start_revision
+=
1
;
...
...
lib/runtime/src/transports/etcd/connector.rs
View file @
cf630bf7
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
use
crate
::{
Error
Context
,
Result
,
error
};
use
anyhow
::{
Context
,
Result
};
use
etcd_client
::
ConnectOptions
;
use
etcd_client
::
ConnectOptions
;
use
parking_lot
::
RwLock
;
use
parking_lot
::
RwLock
;
use
std
::{
sync
::
Arc
,
time
::
Duration
};
use
std
::{
sync
::
Arc
,
time
::
Duration
};
...
@@ -76,9 +76,7 @@ impl Connector {
...
@@ -76,9 +76,7 @@ impl Connector {
loop
{
loop
{
backoff_state
.apply_backoff
(
deadline
)
.await
;
backoff_state
.apply_backoff
(
deadline
)
.await
;
if
std
::
time
::
Instant
::
now
()
>=
deadline
{
if
std
::
time
::
Instant
::
now
()
>=
deadline
{
return
Err
(
error!
(
anyhow
::
bail!
(
"Unable to reconnect to ETCD cluster: deadline exceeded"
);
"Unable to reconnect to ETCD cluster: deadline exceeded"
));
}
}
match
Self
::
connect
(
&
self
.etcd_urls
,
&
self
.connect_options
)
.await
{
match
Self
::
connect
(
&
self
.etcd_urls
,
&
self
.connect_options
)
.await
{
...
...
lib/runtime/src/transports/etcd/lock.rs
View file @
cf630bf7
...
@@ -7,7 +7,7 @@ use std::time::Duration;
...
@@ -7,7 +7,7 @@ use std::time::Duration;
use
etcd_client
::{
Compare
,
CompareOp
,
PutOptions
,
Txn
,
TxnOp
};
use
etcd_client
::{
Compare
,
CompareOp
,
PutOptions
,
Txn
,
TxnOp
};
use
crate
::
Result
;
use
anyhow
::
Result
;
use
super
::
Client
;
use
super
::
Client
;
...
...
lib/runtime/src/transports/nats.rs
View file @
cf630bf7
...
@@ -16,9 +16,10 @@
...
@@ -16,9 +16,10 @@
//! - `NATS_AUTH_CREDENTIALS_FILE`: the path to the credentials file
//! - `NATS_AUTH_CREDENTIALS_FILE`: the path to the credentials file
//!
//!
//! Note: `NATS_AUTH_USERNAME` and `NATS_AUTH_PASSWORD` must be used together.
//! Note: `NATS_AUTH_USERNAME` and `NATS_AUTH_PASSWORD` must be used together.
use
crate
::
metrics
::
MetricsHierarchy
;
use
crate
::
traits
::
events
::
EventPublisher
;
use
crate
::
traits
::
events
::
EventPublisher
;
use
crate
::{
Result
,
metrics
::
MetricsHierarchy
};
use
anyhow
::
Result
;
use
async_nats
::
connection
::
State
;
use
async_nats
::
connection
::
State
;
use
async_nats
::{
Subscriber
,
client
,
jetstream
};
use
async_nats
::{
Subscriber
,
client
,
jetstream
};
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
...
...
lib/runtime/src/transports/zmq.rs
View file @
cf630bf7
...
@@ -28,7 +28,6 @@ use tokio::{
...
@@ -28,7 +28,6 @@ use tokio::{
task
::{
JoinError
,
JoinHandle
},
task
::{
JoinError
,
JoinHandle
},
};
};
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
use
tracing
as
log
;
// Core message types
// Core message types
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
...
@@ -116,11 +115,11 @@ impl Server {
...
@@ -116,11 +115,11 @@ impl Server {
// but we also propagate the error to the caller's cancellation token
// but we also propagate the error to the caller's cancellation token
let
watch_task
=
tokio
::
spawn
(
async
move
{
let
watch_task
=
tokio
::
spawn
(
async
move
{
let
result
=
primary_task
.await
.inspect_err
(|
e
|
{
let
result
=
primary_task
.await
.inspect_err
(|
e
|
{
lo
g
::
error!
(
"zmq server/router task failed: {}"
,
e
);
tracin
g
::
error!
(
"zmq server/router task failed: {}"
,
e
);
cancel_token
.cancel
();
cancel_token
.cancel
();
})
?
;
})
?
;
result
.inspect_err
(|
e
|
{
result
.inspect_err
(|
e
|
{
lo
g
::
error!
(
"zmq server/router task failed: {}"
,
e
);
tracin
g
::
error!
(
"zmq server/router task failed: {}"
,
e
);
cancel_token
.cancel
();
cancel_token
.cancel
();
})
})
});
});
...
@@ -156,7 +155,7 @@ impl Server {
...
@@ -156,7 +155,7 @@ impl Server {
// let port = addr.as_socket().map(|s| s.port());
// let port = addr.as_socket().map(|s| s.port());
// if let Some(port) = port {
// if let Some(port) = port {
//
lo
g::info!("Server listening on port {}", port);
//
tracin
g::info!("Server listening on port {}", port);
// }
// }
loop
{
loop
{
...
@@ -169,7 +168,7 @@ impl Server {
...
@@ -169,7 +168,7 @@ impl Server {
frames
frames
},
},
Some
(
Err
(
e
))
=>
{
Some
(
Err
(
e
))
=>
{
lo
g
::
warn!
(
"Error receiving message: {}"
,
e
);
tracin
g
::
warn!
(
"Error receiving message: {}"
,
e
);
continue
;
continue
;
}
}
None
=>
break
,
None
=>
break
,
...
@@ -177,7 +176,7 @@ impl Server {
...
@@ -177,7 +176,7 @@ impl Server {
}
}
_
=
token
.cancelled
()
=>
{
_
=
token
.cancelled
()
=>
{
lo
g
::
info!
(
"Server shutting down"
);
tracin
g
::
info!
(
"Server shutting down"
);
break
;
break
;
}
}
};
};
...
@@ -203,7 +202,7 @@ impl Server {
...
@@ -203,7 +202,7 @@ impl Server {
// first we try to send the data eagerly without blocking
// first we try to send the data eagerly without blocking
let
action
=
match
tx
.try_send
(
message
.into
())
{
let
action
=
match
tx
.try_send
(
message
.into
())
{
Ok
(
_
)
=>
{
Ok
(
_
)
=>
{
lo
g
::
trace!
(
tracin
g
::
trace!
(
request_id
,
request_id
,
"response data sent eagerly to stream: {} bytes"
,
"response data sent eagerly to stream: {} bytes"
,
message_size
message_size
...
@@ -212,11 +211,14 @@ impl Server {
...
@@ -212,11 +211,14 @@ impl Server {
}
}
Err
(
e
)
=>
match
e
{
Err
(
e
)
=>
match
e
{
mpsc
::
error
::
TrySendError
::
Closed
(
_
)
=>
{
mpsc
::
error
::
TrySendError
::
Closed
(
_
)
=>
{
lo
g
::
info!
(
request_id
,
"response stream was closed"
);
tracin
g
::
info!
(
request_id
,
"response stream was closed"
);
StreamAction
::
Close
StreamAction
::
Close
}
}
mpsc
::
error
::
TrySendError
::
Full
(
data
)
=>
{
mpsc
::
error
::
TrySendError
::
Full
(
data
)
=>
{
log
::
warn!
(
request_id
,
"response stream is full; backpressue alert"
);
tracing
::
warn!
(
request_id
,
"response stream is full; backpressure alert"
);
// todo - add timeout - we are blocking all other streams
// todo - add timeout - we are blocking all other streams
if
(
tx
.send
(
data
)
.await
)
.is_err
()
{
if
(
tx
.send
(
data
)
.await
)
.is_err
()
{
StreamAction
::
Close
StreamAction
::
Close
...
@@ -245,7 +247,7 @@ impl Server {
...
@@ -245,7 +247,7 @@ impl Server {
}
else
{
}
else
{
// increment bytes_dropped
// increment bytes_dropped
// increment messages_dropped
// increment messages_dropped
lo
g
::
trace!
(
request_id
,
"no active stream for request_id"
);
tracin
g
::
trace!
(
request_id
,
"no active stream for request_id"
);
}
}
}
}
...
...
lib/runtime/src/utils/typed_prefix_watcher.rs
View file @
cf630bf7
...
@@ -6,8 +6,8 @@
...
@@ -6,8 +6,8 @@
//! This module provides reusable patterns for watching etcd prefixes and maintaining
//! This module provides reusable patterns for watching etcd prefixes and maintaining
//! HashMap-based state that automatically updates based on etcd events.
//! HashMap-based state that automatically updates based on etcd events.
use
crate
::
Result
;
use
crate
::
transports
::
etcd
::{
Client
as
EtcdClient
,
WatchEvent
};
use
crate
::
transports
::
etcd
::{
Client
as
EtcdClient
,
WatchEvent
};
use
anyhow
::
Result
;
use
etcd_client
::
KeyValue
;
use
etcd_client
::
KeyValue
;
use
serde
::
de
::
DeserializeOwned
;
use
serde
::
de
::
DeserializeOwned
;
use
std
::
collections
::
HashMap
;
use
std
::
collections
::
HashMap
;
...
...
lib/runtime/src/worker.rs
View file @
cf630bf7
...
@@ -20,7 +20,7 @@
...
@@ -20,7 +20,7 @@
//! and release builds. In development, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG] and
//! and release builds. In development, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG] and
//! in release, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_RELEASE].
//! in release, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_RELEASE].
use
super
::{
CancellationToken
,
Result
,
Runtime
,
RuntimeConfig
,
error
};
use
super
::{
CancellationToken
,
Runtime
,
RuntimeConfig
};
use
futures
::
Future
;
use
futures
::
Future
;
use
once_cell
::
sync
::
OnceCell
;
use
once_cell
::
sync
::
OnceCell
;
...
@@ -30,7 +30,7 @@ use tokio::{signal, task::JoinHandle};
...
@@ -30,7 +30,7 @@ use tokio::{signal, task::JoinHandle};
static
RT
:
OnceCell
<
tokio
::
runtime
::
Runtime
>
=
OnceCell
::
new
();
static
RT
:
OnceCell
<
tokio
::
runtime
::
Runtime
>
=
OnceCell
::
new
();
static
RTHANDLE
:
OnceCell
<
tokio
::
runtime
::
Handle
>
=
OnceCell
::
new
();
static
RTHANDLE
:
OnceCell
<
tokio
::
runtime
::
Handle
>
=
OnceCell
::
new
();
static
INIT
:
OnceCell
<
Mutex
<
Option
<
tokio
::
task
::
JoinHandle
<
Result
<
()
>>>>>
=
OnceCell
::
new
();
static
INIT
:
OnceCell
<
Mutex
<
Option
<
tokio
::
task
::
JoinHandle
<
anyhow
::
Result
<
()
>>>>>
=
OnceCell
::
new
();
const
SHUTDOWN_MESSAGE
:
&
str
=
const
SHUTDOWN_MESSAGE
:
&
str
=
"Application received shutdown signal; attempting to gracefully shutdown"
;
"Application received shutdown signal; attempting to gracefully shutdown"
;
...
@@ -54,30 +54,30 @@ pub struct Worker {
...
@@ -54,30 +54,30 @@ pub struct Worker {
impl
Worker
{
impl
Worker
{
/// Create a new [`Worker`] instance from [`RuntimeConfig`] settings which is sourced from the environment
/// Create a new [`Worker`] instance from [`RuntimeConfig`] settings which is sourced from the environment
pub
fn
from_settings
()
->
Result
<
Worker
>
{
pub
fn
from_settings
()
->
anyhow
::
Result
<
Worker
>
{
let
config
=
RuntimeConfig
::
from_settings
()
?
;
let
config
=
RuntimeConfig
::
from_settings
()
?
;
Worker
::
from_config
(
config
)
Worker
::
from_config
(
config
)
}
}
/// Create a new [`Worker`] instance from a provided [`RuntimeConfig`]
/// Create a new [`Worker`] instance from a provided [`RuntimeConfig`]
pub
fn
from_config
(
config
:
RuntimeConfig
)
->
Result
<
Worker
>
{
pub
fn
from_config
(
config
:
RuntimeConfig
)
->
anyhow
::
Result
<
Worker
>
{
// if the runtime is already initialized, return an error
// if the runtime is already initialized, return an error
if
RT
.get
()
.is_some
()
||
RTHANDLE
.get
()
.is_some
()
{
if
RT
.get
()
.is_some
()
||
RTHANDLE
.get
()
.is_some
()
{
return
Err
(
error
!
(
"Worker already initialized"
));
return
Err
(
anyhow
::
anyhow
!
(
"Worker already initialized"
));
}
}
// create a new runtime and insert it into the OnceCell
// create a new runtime and insert it into the OnceCell
// there is still a potential race-condition here, two threads cou have passed the first check
// there is still a potential race-condition here, two threads cou have passed the first check
// but only one will succeed in inserting the runtime
// but only one will succeed in inserting the runtime
let
rt
=
RT
.try_insert
(
config
.create_runtime
()
?
)
.map_err
(|
_
|
{
let
rt
=
RT
.try_insert
(
config
.create_runtime
()
?
)
.map_err
(|
_
|
{
error
!
(
"Failed to create worker; Only a single Worker should ever be created"
)
anyhow
::
anyhow
!
(
"Failed to create worker; Only a single Worker should ever be created"
)
})
?
;
})
?
;
let
runtime
=
Runtime
::
from_handle
(
rt
.handle
()
.clone
())
?
;
let
runtime
=
Runtime
::
from_handle
(
rt
.handle
()
.clone
())
?
;
Ok
(
Worker
{
runtime
,
config
})
Ok
(
Worker
{
runtime
,
config
})
}
}
pub
fn
runtime_from_existing
()
->
Result
<
Runtime
>
{
pub
fn
runtime_from_existing
()
->
anyhow
::
Result
<
Runtime
>
{
if
let
Some
(
rt
)
=
RT
.get
()
{
if
let
Some
(
rt
)
=
RT
.get
()
{
Ok
(
Runtime
::
from_handle
(
rt
.handle
()
.clone
())
?
)
Ok
(
Runtime
::
from_handle
(
rt
.handle
()
.clone
())
?
)
}
else
if
let
Some
(
rt
)
=
RTHANDLE
.get
()
{
}
else
if
let
Some
(
rt
)
=
RTHANDLE
.get
()
{
...
@@ -87,18 +87,19 @@ impl Worker {
...
@@ -87,18 +87,19 @@ impl Worker {
}
}
}
}
pub
fn
tokio_runtime
(
&
self
)
->
Result
<&
'static
tokio
::
runtime
::
Runtime
>
{
pub
fn
tokio_runtime
(
&
self
)
->
anyhow
::
Result
<&
'static
tokio
::
runtime
::
Runtime
>
{
RT
.get
()
.ok_or_else
(||
error!
(
"Worker not initialized"
))
RT
.get
()
.ok_or_else
(||
anyhow
::
anyhow!
(
"Worker not initialized"
))
}
}
pub
fn
runtime
(
&
self
)
->
&
Runtime
{
pub
fn
runtime
(
&
self
)
->
&
Runtime
{
&
self
.runtime
&
self
.runtime
}
}
pub
fn
execute
<
F
,
Fut
>
(
self
,
f
:
F
)
->
Result
<
()
>
pub
fn
execute
<
F
,
Fut
>
(
self
,
f
:
F
)
->
anyhow
::
Result
<
()
>
where
where
F
:
FnOnce
(
Runtime
)
->
Fut
+
Send
+
'static
,
F
:
FnOnce
(
Runtime
)
->
Fut
+
Send
+
'static
,
Fut
:
Future
<
Output
=
Result
<
()
>>
+
Send
+
'static
,
Fut
:
Future
<
Output
=
anyhow
::
Result
<
()
>>
+
Send
+
'static
,
{
{
let
runtime
=
self
.runtime
.clone
();
let
runtime
=
self
.runtime
.clone
();
runtime
.secondary
()
.block_on
(
self
.execute_internal
(
f
))
??
;
runtime
.secondary
()
.block_on
(
self
.execute_internal
(
f
))
??
;
...
@@ -106,10 +107,10 @@ impl Worker {
...
@@ -106,10 +107,10 @@ impl Worker {
Ok
(())
Ok
(())
}
}
pub
async
fn
execute_async
<
F
,
Fut
>
(
self
,
f
:
F
)
->
Result
<
()
>
pub
async
fn
execute_async
<
F
,
Fut
>
(
self
,
f
:
F
)
->
anyhow
::
Result
<
()
>
where
where
F
:
FnOnce
(
Runtime
)
->
Fut
+
Send
+
'static
,
F
:
FnOnce
(
Runtime
)
->
Fut
+
Send
+
'static
,
Fut
:
Future
<
Output
=
Result
<
()
>>
+
Send
+
'static
,
Fut
:
Future
<
Output
=
anyhow
::
Result
<
()
>>
+
Send
+
'static
,
{
{
let
runtime
=
self
.runtime
.clone
();
let
runtime
=
self
.runtime
.clone
();
let
task
=
self
.execute_internal
(
f
);
let
task
=
self
.execute_internal
(
f
);
...
@@ -120,10 +121,10 @@ impl Worker {
...
@@ -120,10 +121,10 @@ impl Worker {
/// Executes the provided application/closure on the [`Runtime`].
/// Executes the provided application/closure on the [`Runtime`].
/// This is designed to be called once from main and will block the calling thread until the application completes.
/// This is designed to be called once from main and will block the calling thread until the application completes.
fn
execute_internal
<
F
,
Fut
>
(
self
,
f
:
F
)
->
JoinHandle
<
Result
<
()
>>
fn
execute_internal
<
F
,
Fut
>
(
self
,
f
:
F
)
->
JoinHandle
<
anyhow
::
Result
<
()
>>
where
where
F
:
FnOnce
(
Runtime
)
->
Fut
+
Send
+
'static
,
F
:
FnOnce
(
Runtime
)
->
Fut
+
Send
+
'static
,
Fut
:
Future
<
Output
=
Result
<
()
>>
+
Send
+
'static
,
Fut
:
Future
<
Output
=
anyhow
::
Result
<
()
>>
+
Send
+
'static
,
{
{
let
runtime
=
self
.runtime
.clone
();
let
runtime
=
self
.runtime
.clone
();
let
primary
=
runtime
.primary
();
let
primary
=
runtime
.primary
();
...
@@ -142,13 +143,13 @@ impl Worker {
...
@@ -142,13 +143,13 @@ impl Worker {
INIT
.set
(
Mutex
::
new
(
Some
(
secondary
.spawn
(
async
move
{
INIT
.set
(
Mutex
::
new
(
Some
(
secondary
.spawn
(
async
move
{
// start signal handler
// start signal handler
tokio
::
spawn
(
signal_handler
(
runtime
.
cancellation
_token
.clone
()));
tokio
::
spawn
(
signal_handler
(
runtime
.
primary
_token
()
.clone
()));
let
cancel_token
=
runtime
.child_token
();
let
cancel_token
=
runtime
.child_token
();
let
(
mut
app_tx
,
app_rx
)
=
tokio
::
sync
::
oneshot
::
channel
::
<
()
>
();
let
(
mut
app_tx
,
app_rx
)
=
tokio
::
sync
::
oneshot
::
channel
::
<
()
>
();
// spawn a task to run the application
// spawn a task to run the application
let
task
:
JoinHandle
<
Result
<
()
>>
=
primary
.spawn
(
async
move
{
let
task
:
JoinHandle
<
anyhow
::
Result
<
()
>>
=
primary
.spawn
(
async
move
{
let
_
rx
=
app_rx
;
let
_
rx
=
app_rx
;
f
(
runtime
)
.await
f
(
runtime
)
.await
});
});
...
@@ -195,9 +196,9 @@ impl Worker {
...
@@ -195,9 +196,9 @@ impl Worker {
.expect
(
"Application initialized; but another thread is awaiting it; Worker.execute() can only be called once"
)
.expect
(
"Application initialized; but another thread is awaiting it; Worker.execute() can only be called once"
)
}
}
pub
fn
from_current
()
->
Result
<
Worker
>
{
pub
fn
from_current
()
->
anyhow
::
Result
<
Worker
>
{
if
RT
.get
()
.is_some
()
||
RTHANDLE
.get
()
.is_some
()
{
if
RT
.get
()
.is_some
()
||
RTHANDLE
.get
()
.is_some
()
{
return
Err
(
error
!
(
"Worker already initialized"
));
return
Err
(
anyhow
::
anyhow
!
(
"Worker already initialized"
));
}
}
let
runtime
=
Runtime
::
from_current
()
?
;
let
runtime
=
Runtime
::
from_current
()
?
;
let
config
=
RuntimeConfig
::
from_settings
()
?
;
let
config
=
RuntimeConfig
::
from_settings
()
?
;
...
@@ -206,7 +207,7 @@ impl Worker {
...
@@ -206,7 +207,7 @@ impl Worker {
}
}
/// Catch signals and trigger a shutdown
/// Catch signals and trigger a shutdown
async
fn
signal_handler
(
cancel_token
:
CancellationToken
)
->
Result
<
()
>
{
async
fn
signal_handler
(
cancel_token
:
CancellationToken
)
->
anyhow
::
Result
<
()
>
{
let
ctrl_c
=
async
{
let
ctrl_c
=
async
{
signal
::
ctrl_c
()
.await
?
;
signal
::
ctrl_c
()
.await
?
;
anyhow
::
Ok
(())
anyhow
::
Ok
(())
...
...
lib/runtime/tests/lifecycle.rs
View file @
cf630bf7
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
use
dynamo_runtime
::{
Result
,
Runtime
,
worker
::
Worker
};
use
anyhow
::
Result
;
use
dynamo_runtime
::{
Runtime
,
worker
::
Worker
};
async
fn
hello_world
(
_
runtime
:
Runtime
)
->
Result
<
()
>
{
async
fn
hello_world
(
_
runtime
:
Runtime
)
->
Result
<
()
>
{
Ok
(())
Ok
(())
...
@@ -12,50 +14,3 @@ fn test_lifecycle() {
...
@@ -12,50 +14,3 @@ fn test_lifecycle() {
let
worker
=
Worker
::
from_settings
()
.unwrap
();
let
worker
=
Worker
::
from_settings
()
.unwrap
();
worker
.execute
(
hello_world
)
.unwrap
();
worker
.execute
(
hello_world
)
.unwrap
();
}
}
// async fn discoverable(runtime: Runtime) -> Result<()> {
// let config = DiscoveryConfig {
// etcd_url: vec!["http://localhost:2379".to_string()],
// etcd_connect_options: None,
// };
// let client = DiscoveryClient::new(config, runtime.clone()).await?;
// println!("Primary lease id: {:x}", client.lease_id());
// let lease = client.create_lease(60).await?;
// // Keys and values
// let lock_key = "lock_key"; // Key for the lock
// let object_key = "object_key"; // Key for the object
// let object_value = "This is the object value"; // Value for the object
// let lock_value = "locked"; // Value indicating a lock
// let put_options = Some(PutOptions::new().with_lease(lease.id()));
// // Build the transaction
// let txn = Txn::new()
// .when(vec![Compare::version(lock_key, CompareOp::Equal, 0)]) // Ensure the lock does not exist
// .and_then(vec![
// TxnOp::put(object_key, object_value, put_options.clone()), // Create the object
// TxnOp::put(lock_key, lock_value, put_options), // Set the lock
// ]);
// // Execute the transaction
// let txn_response = client.etc_client().kv_client().txn(txn).await?;
// tokio::spawn(async move {
// println!("custom lease id: {:x}", lease.id());
// lease.cancel_token().cancelled().await;
// println!("custom lease revoked");
// });
// runtime.child_token().cancelled().await;
// Ok(())
// }
// #[test]
// fn test_discovery_client() {
// let runtime = Runtime::new(RuntimeConfig::default()).unwrap();
// runtime.execute(discoverable).unwrap();
// }
lib/runtime/tests/pipeline.rs
View file @
cf630bf7
...
@@ -3,17 +3,23 @@
...
@@ -3,17 +3,23 @@
#![allow(dead_code)]
#![allow(dead_code)]
use
anyhow
::
Error
;
use
futures
::{
StreamExt
,
stream
};
use
futures
::{
StreamExt
,
stream
};
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
use
std
::{
sync
::
Arc
,
time
::
Duration
};
use
std
::{
sync
::
Arc
,
time
::
Duration
};
use
dynamo_runtime
::
engine
::
ResponseStream
;
use
dynamo_runtime
::
engine
::
ResponseStream
;
use
dynamo_runtime
::{
use
dynamo_runtime
::
pipeline
::{
Error
,
AsyncEngine
,
pipeline
::{
Data
,
AsyncEngine
,
Data
,
Event
,
ManyOut
,
Operator
,
ServiceBackend
,
ServiceEngine
,
Event
,
ServiceFrontend
,
SingleIn
,
async_trait
,
*
,
ManyOut
,
},
Operator
,
ServiceBackend
,
ServiceEngine
,
ServiceFrontend
,
SingleIn
,
*
,
// TODO remove the star
};
};
mod
common
;
mod
common
;
...
@@ -46,7 +52,7 @@ pub enum Annotated<T: Data> {
...
@@ -46,7 +52,7 @@ pub enum Annotated<T: Data> {
/// to the output stream
/// to the output stream
struct
PreprocesOperator
{}
struct
PreprocesOperator
{}
#[async_trait]
#[async_trait
::async_trait
]
impl
impl
Operator
<
Operator
<
SingleIn
<
String
>
,
SingleIn
<
String
>
,
...
...
lib/runtime/tests/soak.rs
View file @
cf630bf7
...
@@ -18,7 +18,7 @@ mod integration {
...
@@ -18,7 +18,7 @@ mod integration {
pub
const
DEFAULT_NAMESPACE
:
&
str
=
"dynamo"
;
pub
const
DEFAULT_NAMESPACE
:
&
str
=
"dynamo"
;
use
dynamo_runtime
::{
use
dynamo_runtime
::{
DistributedRuntime
,
ErrorContext
,
Result
,
Runtime
,
Worker
,
logging
,
DistributedRuntime
,
Runtime
,
Worker
,
logging
,
pipeline
::{
pipeline
::{
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
PushRouter
,
ResponseStream
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
PushRouter
,
ResponseStream
,
SingleIn
,
async_trait
,
network
::
Ingress
,
SingleIn
,
async_trait
,
network
::
Ingress
,
...
@@ -26,6 +26,8 @@ mod integration {
...
@@ -26,6 +26,8 @@ mod integration {
protocols
::
annotated
::
Annotated
,
protocols
::
annotated
::
Annotated
,
stream
,
stream
,
};
};
use
anyhow
::{
Context
,
Result
};
use
futures
::
StreamExt
;
use
futures
::
StreamExt
;
use
std
::{
use
std
::{
sync
::
Arc
,
sync
::
Arc
,
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment