Unverified Commit 80cac7c1 authored by Tzu-Ling Kan's avatar Tzu-Ling Kan Committed by GitHub
Browse files

feat: Remove Component from public (#6403)


Signed-off-by: default avatartzulingk@nvidia.com <tzulingk@nvidia.com>
parent cb55766c
......@@ -47,6 +47,7 @@ use async_nats::{
rustls::quic,
service::{Service, ServiceExt},
};
use dashmap::DashMap;
use derive_builder::Builder;
use derive_getters::Getters;
use educe::Educe;
......@@ -425,6 +426,13 @@ pub struct Namespace {
/// This hierarchy's own metrics registry
#[builder(default = "crate::MetricsRegistry::new()")]
metrics_registry: crate::MetricsRegistry,
/// Cache for components to avoid duplicate registrations and metrics collisions.
/// When the same component is requested multiple times, we return the cached instance
/// to ensure all endpoints share the same Component and MetricsRegistry.
/// Uses DashMap for lock-free reads and automatic handling of concurrent inserts.
#[builder(default = "Arc::new(DashMap::new())")]
component_cache: Arc<DashMap<String, Component>>,
}
impl DistributedRuntimeProvider for Namespace {
......@@ -469,14 +477,34 @@ impl Namespace {
}
/// Create a [`Component`] in the namespace who's endpoints can be discovered with etcd
///
/// Components are cached by name to ensure that multiple calls with the same name
/// return the same Component instance. This prevents duplicate metrics registrations
/// and ensures all endpoints share the same Component's MetricsRegistry.
pub fn component(&self, name: impl Into<String>) -> anyhow::Result<Component> {
let name = name.into();
// Fast path: Check if component exists in cache
// DashMap provides lock-free reads via internal sharding
if let Some(cached) = self.component_cache.get(&name) {
return Ok(cached.value().clone());
}
// Slow path: Create new component
let component = ComponentBuilder::from_runtime(self.runtime.clone())
.name(name)
.name(&name)
.namespace(self.clone())
.build()?;
// Attach component registry so scrapes traverse separate registries (avoids collisions).
self.get_metrics_registry()
.add_child_registry(component.get_metrics_registry());
// Cache the component for future calls
// DashMap handles race conditions internally - if another thread
// inserted the same key concurrently, we just use our created component
self.component_cache.insert(name, component.clone());
Ok(component)
}
......
......@@ -407,7 +407,7 @@ def test_router_decisions_trtllm_attention_dp(
# Get runtime and create endpoint
runtime = get_runtime(request_plane=request_plane)
# Use the namespace from the vLLM workers
# Use the namespace from the TRT-LLM workers
endpoint = runtime.endpoint(f"{trtllm_workers.namespace}.tensorrt_llm.generate")
_test_router_decisions(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment