Unverified Commit 5b24b429 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore: no need to arc wrap client (#4741)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 0651a4fe
......@@ -1031,7 +1031,7 @@ pub async fn create_worker_selection_pipeline_chat(
// Create worker monitor if busy_threshold is set
// Note: C bindings don't register with ModelManager, so HTTP endpoint won't see this
let worker_monitor = busy_threshold.map(|t| KvWorkerMonitor::new(Arc::new(client.clone()), t));
let worker_monitor = busy_threshold.map(|t| KvWorkerMonitor::new(client.clone(), t));
let engine = build_routed_pipeline::<
NvCreateChatCompletionRequest,
......
......@@ -531,7 +531,7 @@ impl ModelManager {
pub fn get_or_create_worker_monitor(
&self,
model: &str,
client: Arc<Client>,
client: Client,
threshold: f64,
) -> KvWorkerMonitor {
let mut monitors = self.worker_monitors.write();
......
......@@ -405,11 +405,8 @@ impl ModelWatcher {
// Get or create the worker monitor for this model
// This allows dynamic threshold updates via the ModelManager
let worker_monitor = self.router_config.busy_threshold.map(|threshold| {
self.manager.get_or_create_worker_monitor(
card.name(),
Arc::new(client.clone()),
threshold,
)
self.manager
.get_or_create_worker_monitor(card.name(), client.clone(), threshold)
});
// Add chat engine only if the model supports chat
......
......@@ -55,11 +55,11 @@ impl WorkerLoadState {
/// Worker monitor for tracking KV cache usage and busy states.
///
/// All fields are `Arc`, so cloning shares state. This allows multiple pipelines
/// Cloning shares state via internal Arc-wrapped fields. This allows multiple pipelines
/// (e.g., chat and completions) to share the same monitor instance.
#[derive(Clone)]
pub struct KvWorkerMonitor {
client: Arc<Client>,
client: Client,
worker_load_states: Arc<RwLock<HashMap<u64, WorkerLoadState>>>,
/// Threshold stored as parts-per-10000 (e.g., 8500 = 0.85)
busy_threshold: Arc<AtomicU32>,
......@@ -72,7 +72,7 @@ impl KvWorkerMonitor {
///
/// The threshold (0.0-1.0) controls when workers are considered busy based on
/// KV cache utilization. It can be dynamically updated via `set_threshold()`.
pub fn new(client: Arc<Client>, threshold: f64) -> Self {
pub fn new(client: Client, threshold: f64) -> Self {
Self {
client,
worker_load_states: Arc::new(RwLock::new(HashMap::new())),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment