Unverified Commit cf630bf7 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

refactor: Make the Runtime and DistributedRuntime fields private (#4193)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent 0e623146
...@@ -8,15 +8,16 @@ ...@@ -8,15 +8,16 @@
// component's "service state" // component's "service state"
use crate::{ use crate::{
DistributedRuntime, Result, DistributedRuntime,
component::Component, component::Component,
error,
metrics::{MetricsHierarchy, prometheus_names, prometheus_names::nats_service}, metrics::{MetricsHierarchy, prometheus_names, prometheus_names::nats_service},
traits::*, traits::*,
transports::nats, transports::nats,
utils::stream, utils::stream,
}; };
use anyhow::Result;
use anyhow::anyhow as error;
use async_nats::Message; use async_nats::Message;
use async_stream::try_stream; use async_stream::try_stream;
use bytes::Bytes; use bytes::Bytes;
......
...@@ -81,13 +81,13 @@ pub async fn spawn_system_status_server( ...@@ -81,13 +81,13 @@ pub async fn spawn_system_status_server(
let server_state = Arc::new(SystemStatusState::new(drt)?); let server_state = Arc::new(SystemStatusState::new(drt)?);
let health_path = server_state let health_path = server_state
.drt() .drt()
.system_health .system_health()
.lock() .lock()
.health_path() .health_path()
.to_string(); .to_string();
let live_path = server_state let live_path = server_state
.drt() .drt()
.system_health .system_health()
.lock() .lock()
.live_path() .live_path()
.to_string(); .to_string();
...@@ -158,9 +158,11 @@ pub async fn spawn_system_status_server( ...@@ -158,9 +158,11 @@ pub async fn spawn_system_status_server(
#[tracing::instrument(skip_all, level = "trace")] #[tracing::instrument(skip_all, level = "trace")]
async fn health_handler(state: Arc<SystemStatusState>) -> impl IntoResponse { async fn health_handler(state: Arc<SystemStatusState>) -> impl IntoResponse {
// Get basic health status // Get basic health status
let system_health = state.drt().system_health.lock(); let system_health = state.drt().system_health();
let (healthy, endpoints) = system_health.get_health_status(); let system_health_lock = system_health.lock();
let uptime = Some(system_health.uptime()); let (healthy, endpoints) = system_health_lock.get_health_status();
let uptime = Some(system_health_lock.uptime());
drop(system_health_lock);
let healthy_string = if healthy { "ready" } else { "notready" }; let healthy_string = if healthy { "ready" } else { "notready" };
let status_code = if healthy { let status_code = if healthy {
...@@ -184,7 +186,7 @@ async fn health_handler(state: Arc<SystemStatusState>) -> impl IntoResponse { ...@@ -184,7 +186,7 @@ async fn health_handler(state: Arc<SystemStatusState>) -> impl IntoResponse {
#[tracing::instrument(skip_all, level = "trace")] #[tracing::instrument(skip_all, level = "trace")]
async fn metrics_handler(state: Arc<SystemStatusState>) -> impl IntoResponse { async fn metrics_handler(state: Arc<SystemStatusState>) -> impl IntoResponse {
// Update the uptime gauge with current value // Update the uptime gauge with current value
state.drt().system_health.lock().update_uptime_gauge(); state.drt().system_health().lock().update_uptime_gauge();
// Get all metrics from DistributedRuntime // Get all metrics from DistributedRuntime
// Note: In the new hierarchy-based architecture, metrics are automatically registered // Note: In the new hierarchy-based architecture, metrics are automatically registered
...@@ -260,13 +262,13 @@ mod integration_tests { ...@@ -260,13 +262,13 @@ mod integration_tests {
let drt = create_test_drt_async().await; let drt = create_test_drt_async().await;
// Get uptime from SystemHealth // Get uptime from SystemHealth
let uptime = drt.system_health.lock().uptime(); let uptime = drt.system_health().lock().uptime();
// Uptime should exist (even if close to zero) // Uptime should exist (even if close to zero)
assert!(uptime.as_nanos() > 0 || uptime.is_zero()); assert!(uptime.as_nanos() > 0 || uptime.is_zero());
// Sleep briefly and check uptime increases // Sleep briefly and check uptime increases
tokio::time::sleep(std::time::Duration::from_millis(100)).await; tokio::time::sleep(std::time::Duration::from_millis(100)).await;
let uptime_after = drt.system_health.lock().uptime(); let uptime_after = drt.system_health().lock().uptime();
assert!(uptime_after > uptime); assert!(uptime_after > uptime);
}) })
.await; .await;
...@@ -317,19 +319,19 @@ mod integration_tests { ...@@ -317,19 +319,19 @@ mod integration_tests {
let drt = create_test_drt_async().await; let drt = create_test_drt_async().await;
// Get initial uptime // Get initial uptime
let initial_uptime = drt.system_health.lock().uptime(); let initial_uptime = drt.system_health().lock().uptime();
// Update the gauge with initial value // Update the gauge with initial value
drt.system_health.lock().update_uptime_gauge(); drt.system_health().lock().update_uptime_gauge();
// Sleep for 100ms // Sleep for 100ms
tokio::time::sleep(std::time::Duration::from_millis(100)).await; tokio::time::sleep(std::time::Duration::from_millis(100)).await;
// Get uptime after sleep // Get uptime after sleep
let uptime_after_sleep = drt.system_health.lock().uptime(); let uptime_after_sleep = drt.system_health().lock().uptime();
// Update the gauge again // Update the gauge again
drt.system_health.lock().update_uptime_gauge(); drt.system_health().lock().update_uptime_gauge();
// Verify uptime increased by at least 100ms // Verify uptime increased by at least 100ms
let elapsed = uptime_after_sleep - initial_uptime; let elapsed = uptime_after_sleep - initial_uptime;
...@@ -582,8 +584,8 @@ mod integration_tests { ...@@ -582,8 +584,8 @@ mod integration_tests {
struct TestHandler; struct TestHandler;
#[async_trait] #[async_trait]
impl AsyncEngine<SingleIn<String>, ManyOut<Annotated<String>>, Error> for TestHandler { impl AsyncEngine<SingleIn<String>, ManyOut<Annotated<String>>, anyhow::Error> for TestHandler {
async fn generate(&self, input: SingleIn<String>) -> crate::Result<ManyOut<Annotated<String>>> { async fn generate(&self, input: SingleIn<String>) -> anyhow::Result<ManyOut<Annotated<String>>> {
let (data, ctx) = input.into_parts(); let (data, ctx) = input.into_parts();
let response = Annotated::from_data(format!("You responded: {}", data)); let response = Annotated::from_data(format!("You responded: {}", data));
Ok(crate::pipeline::ResponseStream::new( Ok(crate::pipeline::ResponseStream::new(
...@@ -733,8 +735,9 @@ mod integration_tests { ...@@ -733,8 +735,9 @@ mod integration_tests {
// Register the endpoint and its health check payload // Register the endpoint and its health check payload
{ {
let system_health = drt.system_health.lock(); let system_health = drt.system_health();
system_health.register_health_check_target( let system_health_lock = system_health.lock();
system_health_lock.register_health_check_target(
endpoint, endpoint,
crate::component::Instance { crate::component::Instance {
component: "test_component".to_string(), component: "test_component".to_string(),
...@@ -760,7 +763,7 @@ mod integration_tests { ...@@ -760,7 +763,7 @@ mod integration_tests {
); );
// Set endpoint to healthy state // Set endpoint to healthy state
drt.system_health drt.system_health()
.lock() .lock()
.set_endpoint_health_status(endpoint, HealthStatus::Ready); .set_endpoint_health_status(endpoint, HealthStatus::Ready);
...@@ -777,7 +780,7 @@ mod integration_tests { ...@@ -777,7 +780,7 @@ mod integration_tests {
// Verify the endpoint status in SystemHealth directly // Verify the endpoint status in SystemHealth directly
let endpoint_status = drt let endpoint_status = drt
.system_health .system_health()
.lock() .lock()
.get_endpoint_health_status(endpoint); .get_endpoint_health_status(endpoint);
assert_eq!( assert_eq!(
......
...@@ -16,7 +16,7 @@ pub trait DistributedRuntimeProvider { ...@@ -16,7 +16,7 @@ pub trait DistributedRuntimeProvider {
impl RuntimeProvider for DistributedRuntime { impl RuntimeProvider for DistributedRuntime {
fn rt(&self) -> &Runtime { fn rt(&self) -> &Runtime {
&self.runtime self.runtime()
} }
} }
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::fmt::Debug; use std::fmt::Debug;
use crate::Result; use anyhow::Result;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
// #[async_trait] // #[async_trait]
// pub trait Publisher: Debug + Clone + Send + Sync { // pub trait Publisher: Debug + Clone + Send + Sync {
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use crate::{CancellationToken, ErrorContext, Result, Runtime, error}; use crate::runtime::Runtime;
use anyhow::{Context, Result};
use async_nats::jetstream::kv; use async_nats::jetstream::kv;
use derive_builder::Builder; use derive_builder::Builder;
...@@ -19,6 +20,7 @@ use etcd_client::{ ...@@ -19,6 +20,7 @@ use etcd_client::{
}; };
pub use etcd_client::{ConnectOptions, KeyValue, LeaseClient}; pub use etcd_client::{ConnectOptions, KeyValue, LeaseClient};
use tokio::time::{Duration, interval}; use tokio::time::{Duration, interval};
use tokio_util::sync::CancellationToken;
mod connector; mod connector;
mod lease; mod lease;
...@@ -139,7 +141,7 @@ impl Client { ...@@ -139,7 +141,7 @@ impl Client {
for resp in result.op_responses() { for resp in result.op_responses() {
tracing::warn!(response = ?resp, "kv_create etcd op response"); tracing::warn!(response = ?resp, "kv_create etcd op response");
} }
Err(error!("Unable to create key. Check etcd server status")) anyhow::bail!("Unable to create key. Check etcd server status")
} }
} }
...@@ -180,17 +182,15 @@ impl Client { ...@@ -180,17 +182,15 @@ impl Client {
Some(response) => match response { Some(response) => match response {
TxnOpResponse::Txn(response) => match response.succeeded() { TxnOpResponse::Txn(response) => match response.succeeded() {
true => Ok(()), true => Ok(()),
false => Err(error!( false => anyhow::bail!(
"Unable to create or validate key. Check etcd server status" "Unable to create or validate key. Check etcd server status"
)), ),
}, },
_ => Err(error!( _ => {
"Unable to validate key operation. Check etcd server status" anyhow::bail!("Unable to validate key operation. Check etcd server status")
)), }
}, },
None => Err(error!( None => anyhow::bail!("Unable to create or validate key. Check etcd server status"),
"Unable to create or validate key. Check etcd server status"
)),
} }
} }
} }
...@@ -372,7 +372,7 @@ impl Client { ...@@ -372,7 +372,7 @@ impl Client {
// Get the start revision // Get the start revision
let mut start_revision = get_response let mut start_revision = get_response
.header() .header()
.ok_or(error!("missing header; unable to get revision"))? .ok_or(anyhow::anyhow!("missing header; unable to get revision"))?
.revision(); .revision();
tracing::trace!("{prefix}: start_revision: {start_revision}"); tracing::trace!("{prefix}: start_revision: {start_revision}");
start_revision += 1; start_revision += 1;
......
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use crate::{ErrorContext, Result, error}; use anyhow::{Context, Result};
use etcd_client::ConnectOptions; use etcd_client::ConnectOptions;
use parking_lot::RwLock; use parking_lot::RwLock;
use std::{sync::Arc, time::Duration}; use std::{sync::Arc, time::Duration};
...@@ -76,9 +76,7 @@ impl Connector { ...@@ -76,9 +76,7 @@ impl Connector {
loop { loop {
backoff_state.apply_backoff(deadline).await; backoff_state.apply_backoff(deadline).await;
if std::time::Instant::now() >= deadline { if std::time::Instant::now() >= deadline {
return Err(error!( anyhow::bail!("Unable to reconnect to ETCD cluster: deadline exceeded");
"Unable to reconnect to ETCD cluster: deadline exceeded"
));
} }
match Self::connect(&self.etcd_urls, &self.connect_options).await { match Self::connect(&self.etcd_urls, &self.connect_options).await {
......
...@@ -7,7 +7,7 @@ use std::time::Duration; ...@@ -7,7 +7,7 @@ use std::time::Duration;
use etcd_client::{Compare, CompareOp, PutOptions, Txn, TxnOp}; use etcd_client::{Compare, CompareOp, PutOptions, Txn, TxnOp};
use crate::Result; use anyhow::Result;
use super::Client; use super::Client;
......
...@@ -16,9 +16,10 @@ ...@@ -16,9 +16,10 @@
//! - `NATS_AUTH_CREDENTIALS_FILE`: the path to the credentials file //! - `NATS_AUTH_CREDENTIALS_FILE`: the path to the credentials file
//! //!
//! Note: `NATS_AUTH_USERNAME` and `NATS_AUTH_PASSWORD` must be used together. //! Note: `NATS_AUTH_USERNAME` and `NATS_AUTH_PASSWORD` must be used together.
use crate::metrics::MetricsHierarchy;
use crate::traits::events::EventPublisher; use crate::traits::events::EventPublisher;
use crate::{Result, metrics::MetricsHierarchy};
use anyhow::Result;
use async_nats::connection::State; use async_nats::connection::State;
use async_nats::{Subscriber, client, jetstream}; use async_nats::{Subscriber, client, jetstream};
use async_trait::async_trait; use async_trait::async_trait;
......
...@@ -28,7 +28,6 @@ use tokio::{ ...@@ -28,7 +28,6 @@ use tokio::{
task::{JoinError, JoinHandle}, task::{JoinError, JoinHandle},
}; };
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing as log;
// Core message types // Core message types
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
...@@ -116,11 +115,11 @@ impl Server { ...@@ -116,11 +115,11 @@ impl Server {
// but we also propagate the error to the caller's cancellation token // but we also propagate the error to the caller's cancellation token
let watch_task = tokio::spawn(async move { let watch_task = tokio::spawn(async move {
let result = primary_task.await.inspect_err(|e| { let result = primary_task.await.inspect_err(|e| {
log::error!("zmq server/router task failed: {}", e); tracing::error!("zmq server/router task failed: {}", e);
cancel_token.cancel(); cancel_token.cancel();
})?; })?;
result.inspect_err(|e| { result.inspect_err(|e| {
log::error!("zmq server/router task failed: {}", e); tracing::error!("zmq server/router task failed: {}", e);
cancel_token.cancel(); cancel_token.cancel();
}) })
}); });
...@@ -156,7 +155,7 @@ impl Server { ...@@ -156,7 +155,7 @@ impl Server {
// let port = addr.as_socket().map(|s| s.port()); // let port = addr.as_socket().map(|s| s.port());
// if let Some(port) = port { // if let Some(port) = port {
// log::info!("Server listening on port {}", port); // tracing::info!("Server listening on port {}", port);
// } // }
loop { loop {
...@@ -169,7 +168,7 @@ impl Server { ...@@ -169,7 +168,7 @@ impl Server {
frames frames
}, },
Some(Err(e)) => { Some(Err(e)) => {
log::warn!("Error receiving message: {}", e); tracing::warn!("Error receiving message: {}", e);
continue; continue;
} }
None => break, None => break,
...@@ -177,7 +176,7 @@ impl Server { ...@@ -177,7 +176,7 @@ impl Server {
} }
_ = token.cancelled() => { _ = token.cancelled() => {
log::info!("Server shutting down"); tracing::info!("Server shutting down");
break; break;
} }
}; };
...@@ -203,7 +202,7 @@ impl Server { ...@@ -203,7 +202,7 @@ impl Server {
// first we try to send the data eagerly without blocking // first we try to send the data eagerly without blocking
let action = match tx.try_send(message.into()) { let action = match tx.try_send(message.into()) {
Ok(_) => { Ok(_) => {
log::trace!( tracing::trace!(
request_id, request_id,
"response data sent eagerly to stream: {} bytes", "response data sent eagerly to stream: {} bytes",
message_size message_size
...@@ -212,11 +211,14 @@ impl Server { ...@@ -212,11 +211,14 @@ impl Server {
} }
Err(e) => match e { Err(e) => match e {
mpsc::error::TrySendError::Closed(_) => { mpsc::error::TrySendError::Closed(_) => {
log::info!(request_id, "response stream was closed"); tracing::info!(request_id, "response stream was closed");
StreamAction::Close StreamAction::Close
} }
mpsc::error::TrySendError::Full(data) => { mpsc::error::TrySendError::Full(data) => {
log::warn!(request_id, "response stream is full; backpressue alert"); tracing::warn!(
request_id,
"response stream is full; backpressure alert"
);
// todo - add timeout - we are blocking all other streams // todo - add timeout - we are blocking all other streams
if (tx.send(data).await).is_err() { if (tx.send(data).await).is_err() {
StreamAction::Close StreamAction::Close
...@@ -245,7 +247,7 @@ impl Server { ...@@ -245,7 +247,7 @@ impl Server {
} else { } else {
// increment bytes_dropped // increment bytes_dropped
// increment messages_dropped // increment messages_dropped
log::trace!(request_id, "no active stream for request_id"); tracing::trace!(request_id, "no active stream for request_id");
} }
} }
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
//! This module provides reusable patterns for watching etcd prefixes and maintaining //! This module provides reusable patterns for watching etcd prefixes and maintaining
//! HashMap-based state that automatically updates based on etcd events. //! HashMap-based state that automatically updates based on etcd events.
use crate::Result;
use crate::transports::etcd::{Client as EtcdClient, WatchEvent}; use crate::transports::etcd::{Client as EtcdClient, WatchEvent};
use anyhow::Result;
use etcd_client::KeyValue; use etcd_client::KeyValue;
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
use std::collections::HashMap; use std::collections::HashMap;
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
//! and release builds. In development, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG] and //! and release builds. In development, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG] and
//! in release, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_RELEASE]. //! in release, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_RELEASE].
use super::{CancellationToken, Result, Runtime, RuntimeConfig, error}; use super::{CancellationToken, Runtime, RuntimeConfig};
use futures::Future; use futures::Future;
use once_cell::sync::OnceCell; use once_cell::sync::OnceCell;
...@@ -30,7 +30,7 @@ use tokio::{signal, task::JoinHandle}; ...@@ -30,7 +30,7 @@ use tokio::{signal, task::JoinHandle};
static RT: OnceCell<tokio::runtime::Runtime> = OnceCell::new(); static RT: OnceCell<tokio::runtime::Runtime> = OnceCell::new();
static RTHANDLE: OnceCell<tokio::runtime::Handle> = OnceCell::new(); static RTHANDLE: OnceCell<tokio::runtime::Handle> = OnceCell::new();
static INIT: OnceCell<Mutex<Option<tokio::task::JoinHandle<Result<()>>>>> = OnceCell::new(); static INIT: OnceCell<Mutex<Option<tokio::task::JoinHandle<anyhow::Result<()>>>>> = OnceCell::new();
const SHUTDOWN_MESSAGE: &str = const SHUTDOWN_MESSAGE: &str =
"Application received shutdown signal; attempting to gracefully shutdown"; "Application received shutdown signal; attempting to gracefully shutdown";
...@@ -54,30 +54,30 @@ pub struct Worker { ...@@ -54,30 +54,30 @@ pub struct Worker {
impl Worker { impl Worker {
/// Create a new [`Worker`] instance from [`RuntimeConfig`] settings which is sourced from the environment /// Create a new [`Worker`] instance from [`RuntimeConfig`] settings which is sourced from the environment
pub fn from_settings() -> Result<Worker> { pub fn from_settings() -> anyhow::Result<Worker> {
let config = RuntimeConfig::from_settings()?; let config = RuntimeConfig::from_settings()?;
Worker::from_config(config) Worker::from_config(config)
} }
/// Create a new [`Worker`] instance from a provided [`RuntimeConfig`] /// Create a new [`Worker`] instance from a provided [`RuntimeConfig`]
pub fn from_config(config: RuntimeConfig) -> Result<Worker> { pub fn from_config(config: RuntimeConfig) -> anyhow::Result<Worker> {
// if the runtime is already initialized, return an error // if the runtime is already initialized, return an error
if RT.get().is_some() || RTHANDLE.get().is_some() { if RT.get().is_some() || RTHANDLE.get().is_some() {
return Err(error!("Worker already initialized")); return Err(anyhow::anyhow!("Worker already initialized"));
} }
// create a new runtime and insert it into the OnceCell // create a new runtime and insert it into the OnceCell
// there is still a potential race-condition here, two threads cou have passed the first check // there is still a potential race-condition here, two threads cou have passed the first check
// but only one will succeed in inserting the runtime // but only one will succeed in inserting the runtime
let rt = RT.try_insert(config.create_runtime()?).map_err(|_| { let rt = RT.try_insert(config.create_runtime()?).map_err(|_| {
error!("Failed to create worker; Only a single Worker should ever be created") anyhow::anyhow!("Failed to create worker; Only a single Worker should ever be created")
})?; })?;
let runtime = Runtime::from_handle(rt.handle().clone())?; let runtime = Runtime::from_handle(rt.handle().clone())?;
Ok(Worker { runtime, config }) Ok(Worker { runtime, config })
} }
pub fn runtime_from_existing() -> Result<Runtime> { pub fn runtime_from_existing() -> anyhow::Result<Runtime> {
if let Some(rt) = RT.get() { if let Some(rt) = RT.get() {
Ok(Runtime::from_handle(rt.handle().clone())?) Ok(Runtime::from_handle(rt.handle().clone())?)
} else if let Some(rt) = RTHANDLE.get() { } else if let Some(rt) = RTHANDLE.get() {
...@@ -87,18 +87,19 @@ impl Worker { ...@@ -87,18 +87,19 @@ impl Worker {
} }
} }
pub fn tokio_runtime(&self) -> Result<&'static tokio::runtime::Runtime> { pub fn tokio_runtime(&self) -> anyhow::Result<&'static tokio::runtime::Runtime> {
RT.get().ok_or_else(|| error!("Worker not initialized")) RT.get()
.ok_or_else(|| anyhow::anyhow!("Worker not initialized"))
} }
pub fn runtime(&self) -> &Runtime { pub fn runtime(&self) -> &Runtime {
&self.runtime &self.runtime
} }
pub fn execute<F, Fut>(self, f: F) -> Result<()> pub fn execute<F, Fut>(self, f: F) -> anyhow::Result<()>
where where
F: FnOnce(Runtime) -> Fut + Send + 'static, F: FnOnce(Runtime) -> Fut + Send + 'static,
Fut: Future<Output = Result<()>> + Send + 'static, Fut: Future<Output = anyhow::Result<()>> + Send + 'static,
{ {
let runtime = self.runtime.clone(); let runtime = self.runtime.clone();
runtime.secondary().block_on(self.execute_internal(f))??; runtime.secondary().block_on(self.execute_internal(f))??;
...@@ -106,10 +107,10 @@ impl Worker { ...@@ -106,10 +107,10 @@ impl Worker {
Ok(()) Ok(())
} }
pub async fn execute_async<F, Fut>(self, f: F) -> Result<()> pub async fn execute_async<F, Fut>(self, f: F) -> anyhow::Result<()>
where where
F: FnOnce(Runtime) -> Fut + Send + 'static, F: FnOnce(Runtime) -> Fut + Send + 'static,
Fut: Future<Output = Result<()>> + Send + 'static, Fut: Future<Output = anyhow::Result<()>> + Send + 'static,
{ {
let runtime = self.runtime.clone(); let runtime = self.runtime.clone();
let task = self.execute_internal(f); let task = self.execute_internal(f);
...@@ -120,10 +121,10 @@ impl Worker { ...@@ -120,10 +121,10 @@ impl Worker {
/// Executes the provided application/closure on the [`Runtime`]. /// Executes the provided application/closure on the [`Runtime`].
/// This is designed to be called once from main and will block the calling thread until the application completes. /// This is designed to be called once from main and will block the calling thread until the application completes.
fn execute_internal<F, Fut>(self, f: F) -> JoinHandle<Result<()>> fn execute_internal<F, Fut>(self, f: F) -> JoinHandle<anyhow::Result<()>>
where where
F: FnOnce(Runtime) -> Fut + Send + 'static, F: FnOnce(Runtime) -> Fut + Send + 'static,
Fut: Future<Output = Result<()>> + Send + 'static, Fut: Future<Output = anyhow::Result<()>> + Send + 'static,
{ {
let runtime = self.runtime.clone(); let runtime = self.runtime.clone();
let primary = runtime.primary(); let primary = runtime.primary();
...@@ -142,13 +143,13 @@ impl Worker { ...@@ -142,13 +143,13 @@ impl Worker {
INIT.set(Mutex::new(Some(secondary.spawn(async move { INIT.set(Mutex::new(Some(secondary.spawn(async move {
// start signal handler // start signal handler
tokio::spawn(signal_handler(runtime.cancellation_token.clone())); tokio::spawn(signal_handler(runtime.primary_token().clone()));
let cancel_token = runtime.child_token(); let cancel_token = runtime.child_token();
let (mut app_tx, app_rx) = tokio::sync::oneshot::channel::<()>(); let (mut app_tx, app_rx) = tokio::sync::oneshot::channel::<()>();
// spawn a task to run the application // spawn a task to run the application
let task: JoinHandle<Result<()>> = primary.spawn(async move { let task: JoinHandle<anyhow::Result<()>> = primary.spawn(async move {
let _rx = app_rx; let _rx = app_rx;
f(runtime).await f(runtime).await
}); });
...@@ -195,9 +196,9 @@ impl Worker { ...@@ -195,9 +196,9 @@ impl Worker {
.expect("Application initialized; but another thread is awaiting it; Worker.execute() can only be called once") .expect("Application initialized; but another thread is awaiting it; Worker.execute() can only be called once")
} }
pub fn from_current() -> Result<Worker> { pub fn from_current() -> anyhow::Result<Worker> {
if RT.get().is_some() || RTHANDLE.get().is_some() { if RT.get().is_some() || RTHANDLE.get().is_some() {
return Err(error!("Worker already initialized")); return Err(anyhow::anyhow!("Worker already initialized"));
} }
let runtime = Runtime::from_current()?; let runtime = Runtime::from_current()?;
let config = RuntimeConfig::from_settings()?; let config = RuntimeConfig::from_settings()?;
...@@ -206,7 +207,7 @@ impl Worker { ...@@ -206,7 +207,7 @@ impl Worker {
} }
/// Catch signals and trigger a shutdown /// Catch signals and trigger a shutdown
async fn signal_handler(cancel_token: CancellationToken) -> Result<()> { async fn signal_handler(cancel_token: CancellationToken) -> anyhow::Result<()> {
let ctrl_c = async { let ctrl_c = async {
signal::ctrl_c().await?; signal::ctrl_c().await?;
anyhow::Ok(()) anyhow::Ok(())
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use dynamo_runtime::{Result, Runtime, worker::Worker}; use anyhow::Result;
use dynamo_runtime::{Runtime, worker::Worker};
async fn hello_world(_runtime: Runtime) -> Result<()> { async fn hello_world(_runtime: Runtime) -> Result<()> {
Ok(()) Ok(())
...@@ -12,50 +14,3 @@ fn test_lifecycle() { ...@@ -12,50 +14,3 @@ fn test_lifecycle() {
let worker = Worker::from_settings().unwrap(); let worker = Worker::from_settings().unwrap();
worker.execute(hello_world).unwrap(); worker.execute(hello_world).unwrap();
} }
// async fn discoverable(runtime: Runtime) -> Result<()> {
// let config = DiscoveryConfig {
// etcd_url: vec!["http://localhost:2379".to_string()],
// etcd_connect_options: None,
// };
// let client = DiscoveryClient::new(config, runtime.clone()).await?;
// println!("Primary lease id: {:x}", client.lease_id());
// let lease = client.create_lease(60).await?;
// // Keys and values
// let lock_key = "lock_key"; // Key for the lock
// let object_key = "object_key"; // Key for the object
// let object_value = "This is the object value"; // Value for the object
// let lock_value = "locked"; // Value indicating a lock
// let put_options = Some(PutOptions::new().with_lease(lease.id()));
// // Build the transaction
// let txn = Txn::new()
// .when(vec![Compare::version(lock_key, CompareOp::Equal, 0)]) // Ensure the lock does not exist
// .and_then(vec![
// TxnOp::put(object_key, object_value, put_options.clone()), // Create the object
// TxnOp::put(lock_key, lock_value, put_options), // Set the lock
// ]);
// // Execute the transaction
// let txn_response = client.etc_client().kv_client().txn(txn).await?;
// tokio::spawn(async move {
// println!("custom lease id: {:x}", lease.id());
// lease.cancel_token().cancelled().await;
// println!("custom lease revoked");
// });
// runtime.child_token().cancelled().await;
// Ok(())
// }
// #[test]
// fn test_discovery_client() {
// let runtime = Runtime::new(RuntimeConfig::default()).unwrap();
// runtime.execute(discoverable).unwrap();
// }
...@@ -3,17 +3,23 @@ ...@@ -3,17 +3,23 @@
#![allow(dead_code)] #![allow(dead_code)]
use anyhow::Error;
use futures::{StreamExt, stream}; use futures::{StreamExt, stream};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{sync::Arc, time::Duration}; use std::{sync::Arc, time::Duration};
use dynamo_runtime::engine::ResponseStream; use dynamo_runtime::engine::ResponseStream;
use dynamo_runtime::{ use dynamo_runtime::pipeline::{
Error, AsyncEngine,
pipeline::{ Data,
AsyncEngine, Data, Event, ManyOut, Operator, ServiceBackend, ServiceEngine, Event,
ServiceFrontend, SingleIn, async_trait, *, ManyOut,
}, Operator,
ServiceBackend,
ServiceEngine,
ServiceFrontend,
SingleIn,
*, // TODO remove the star
}; };
mod common; mod common;
...@@ -46,7 +52,7 @@ pub enum Annotated<T: Data> { ...@@ -46,7 +52,7 @@ pub enum Annotated<T: Data> {
/// to the output stream /// to the output stream
struct PreprocesOperator {} struct PreprocesOperator {}
#[async_trait] #[async_trait::async_trait]
impl impl
Operator< Operator<
SingleIn<String>, SingleIn<String>,
......
...@@ -18,7 +18,7 @@ mod integration { ...@@ -18,7 +18,7 @@ mod integration {
pub const DEFAULT_NAMESPACE: &str = "dynamo"; pub const DEFAULT_NAMESPACE: &str = "dynamo";
use dynamo_runtime::{ use dynamo_runtime::{
DistributedRuntime, ErrorContext, Result, Runtime, Worker, logging, DistributedRuntime, Runtime, Worker, logging,
pipeline::{ pipeline::{
AsyncEngine, AsyncEngineContextProvider, Error, ManyOut, PushRouter, ResponseStream, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut, PushRouter, ResponseStream,
SingleIn, async_trait, network::Ingress, SingleIn, async_trait, network::Ingress,
...@@ -26,6 +26,8 @@ mod integration { ...@@ -26,6 +26,8 @@ mod integration {
protocols::annotated::Annotated, protocols::annotated::Annotated,
stream, stream,
}; };
use anyhow::{Context, Result};
use futures::StreamExt; use futures::StreamExt;
use std::{ use std::{
sync::Arc, sync::Arc,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment