"docs/guides/dynamo_deploy/metrics.md" did not exist on "5375af2c52d2d5a218e3936d9c23918da5855b17"
Unverified Commit 09b26bf6 authored by mohammedabdulwahhab's avatar mohammedabdulwahhab Committed by GitHub
Browse files

fix: refactor to use service discovery (#4092)


Signed-off-by: default avatarmohammedabdulwahhab <furkhan324@berkeley.edu>
parent 04f7579b
......@@ -10,7 +10,7 @@ use crate::transports::nats::DRTNatsClientPrometheusMetrics;
use crate::{
ErrorContext,
component::{self, ComponentBuilder, Endpoint, InstanceSource, Namespace},
discovery::DiscoveryClient,
discovery::Discovery,
metrics::PrometheusUpdateCallback,
metrics::{MetricsHierarchy, MetricsRegistry},
service::ServiceClient,
......@@ -92,12 +92,13 @@ impl DistributedRuntime {
let nats_client_for_metrics = nats_client.clone();
// Initialize discovery client with mock implementation
// TODO: Replace MockDiscoveryClient with KeyValueStoreDiscoveryClient or KubeDiscoveryClient
// Initialize discovery backed by KV store
let discovery_client = {
use crate::discovery::{MockDiscoveryClient, SharedMockRegistry};
let registry = SharedMockRegistry::new();
Arc::new(MockDiscoveryClient::new(None, registry)) as Arc<dyn DiscoveryClient>
use crate::discovery::KVStoreDiscovery;
Arc::new(KVStoreDiscovery::new(
store.clone(),
runtime.primary_token(),
)) as Arc<dyn Discovery>
};
let distributed_runtime = Self {
......@@ -242,9 +243,9 @@ impl DistributedRuntime {
Namespace::new(self.clone(), name.into(), self.is_static)
}
/// TODO: Return discovery client when KeyValueDiscoveryClient or KubeDiscoveryClient is implemented
pub fn discovery_client(&self) -> Result<Arc<dyn DiscoveryClient>> {
Err(error!("Discovery client not implemented!"))
/// Returns the discovery interface for service registration and discovery
pub fn discovery(&self) -> Arc<dyn Discovery> {
self.discovery_client.clone()
}
pub(crate) fn service_client(&self) -> Option<ServiceClient> {
......
......@@ -9,25 +9,22 @@
use std::sync::Arc;
use crate::component::{INSTANCE_ROOT_PATH, Instance};
use crate::storage::key_value_store::{KeyValueStore, KeyValueStoreManager};
use crate::transports::etcd::Client as EtcdClient;
use crate::component::Instance;
use crate::discovery::{Discovery, DiscoveryQuery};
pub async fn list_all_instances(client: &KeyValueStoreManager) -> anyhow::Result<Vec<Instance>> {
let Some(bucket) = client.get_bucket(INSTANCE_ROOT_PATH).await? else {
return Ok(vec![]);
};
pub async fn list_all_instances(
discovery_client: Arc<dyn Discovery>,
) -> anyhow::Result<Vec<Instance>> {
let discovery_instances = discovery_client.list(DiscoveryQuery::AllEndpoints).await?;
let mut instances: Vec<Instance> = discovery_instances
.into_iter()
.filter_map(|di| match di {
crate::discovery::DiscoveryInstance::Endpoint(instance) => Some(instance),
_ => None, // Ignore all other variants (ModelCard, etc.)
})
.collect();
let entries = bucket.entries().await?;
let mut instances = Vec::with_capacity(entries.len());
for (name, bytes) in entries.into_iter() {
match serde_json::from_slice::<Instance>(&bytes) {
Ok(instance) => instances.push(instance),
Err(err) => {
tracing::warn!(%err, key = name, "Failed to parse instance from storage");
}
}
}
instances.sort();
Ok(instances)
......
......@@ -96,8 +96,8 @@ pub struct DistributedRuntime {
tcp_server: Arc<OnceCell<Arc<transports::tcp::server::TcpStreamServer>>>,
system_status_server: Arc<OnceLock<Arc<system_status_server::SystemStatusServerInfo>>>,
// Service discovery client
discovery_client: Arc<dyn discovery::DiscoveryClient>,
// Service discovery interface
discovery_client: Arc<dyn discovery::Discovery>,
// local registry for components
// the registry allows us to use share runtime resources across instances of the same component object.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment