Unverified Commit 5b24b429 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore: no need to arc wrap client (#4741)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 0651a4fe
...@@ -1031,7 +1031,7 @@ pub async fn create_worker_selection_pipeline_chat( ...@@ -1031,7 +1031,7 @@ pub async fn create_worker_selection_pipeline_chat(
// Create worker monitor if busy_threshold is set // Create worker monitor if busy_threshold is set
// Note: C bindings don't register with ModelManager, so HTTP endpoint won't see this // Note: C bindings don't register with ModelManager, so HTTP endpoint won't see this
let worker_monitor = busy_threshold.map(|t| KvWorkerMonitor::new(Arc::new(client.clone()), t)); let worker_monitor = busy_threshold.map(|t| KvWorkerMonitor::new(client.clone(), t));
let engine = build_routed_pipeline::< let engine = build_routed_pipeline::<
NvCreateChatCompletionRequest, NvCreateChatCompletionRequest,
......
...@@ -531,7 +531,7 @@ impl ModelManager { ...@@ -531,7 +531,7 @@ impl ModelManager {
pub fn get_or_create_worker_monitor( pub fn get_or_create_worker_monitor(
&self, &self,
model: &str, model: &str,
client: Arc<Client>, client: Client,
threshold: f64, threshold: f64,
) -> KvWorkerMonitor { ) -> KvWorkerMonitor {
let mut monitors = self.worker_monitors.write(); let mut monitors = self.worker_monitors.write();
......
...@@ -405,11 +405,8 @@ impl ModelWatcher { ...@@ -405,11 +405,8 @@ impl ModelWatcher {
// Get or create the worker monitor for this model // Get or create the worker monitor for this model
// This allows dynamic threshold updates via the ModelManager // This allows dynamic threshold updates via the ModelManager
let worker_monitor = self.router_config.busy_threshold.map(|threshold| { let worker_monitor = self.router_config.busy_threshold.map(|threshold| {
self.manager.get_or_create_worker_monitor( self.manager
card.name(), .get_or_create_worker_monitor(card.name(), client.clone(), threshold)
Arc::new(client.clone()),
threshold,
)
}); });
// Add chat engine only if the model supports chat // Add chat engine only if the model supports chat
......
...@@ -55,11 +55,11 @@ impl WorkerLoadState { ...@@ -55,11 +55,11 @@ impl WorkerLoadState {
/// Worker monitor for tracking KV cache usage and busy states. /// Worker monitor for tracking KV cache usage and busy states.
/// ///
/// All fields are `Arc`, so cloning shares state. This allows multiple pipelines /// Cloning shares state via internal Arc-wrapped fields. This allows multiple pipelines
/// (e.g., chat and completions) to share the same monitor instance. /// (e.g., chat and completions) to share the same monitor instance.
#[derive(Clone)] #[derive(Clone)]
pub struct KvWorkerMonitor { pub struct KvWorkerMonitor {
client: Arc<Client>, client: Client,
worker_load_states: Arc<RwLock<HashMap<u64, WorkerLoadState>>>, worker_load_states: Arc<RwLock<HashMap<u64, WorkerLoadState>>>,
/// Threshold stored as parts-per-10000 (e.g., 8500 = 0.85) /// Threshold stored as parts-per-10000 (e.g., 8500 = 0.85)
busy_threshold: Arc<AtomicU32>, busy_threshold: Arc<AtomicU32>,
...@@ -72,7 +72,7 @@ impl KvWorkerMonitor { ...@@ -72,7 +72,7 @@ impl KvWorkerMonitor {
/// ///
/// The threshold (0.0-1.0) controls when workers are considered busy based on /// The threshold (0.0-1.0) controls when workers are considered busy based on
/// KV cache utilization. It can be dynamically updated via `set_threshold()`. /// KV cache utilization. It can be dynamically updated via `set_threshold()`.
pub fn new(client: Arc<Client>, threshold: f64) -> Self { pub fn new(client: Client, threshold: f64) -> Self {
Self { Self {
client, client,
worker_load_states: Arc::new(RwLock::new(HashMap::new())), worker_load_states: Arc::new(RwLock::new(HashMap::new())),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment