"vscode:/vscode.git/clone" did not exist on "76e2727b5c630fdad3b054c717e7ae4bdd5e2d8e"
Unverified Commit ddab4fc7 authored by Simo Lin's avatar Simo Lin Committed by GitHub
Browse files

[router] fix cache aware routing strategy and lock contention (#10773)

parent d21c3522
...@@ -219,7 +219,7 @@ jobs: ...@@ -219,7 +219,7 @@ jobs:
--decode http://127.0.0.7:30007 \ --decode http://127.0.0.7:30007 \
--decode http://127.0.0.8:30008 \ --decode http://127.0.0.8:30008 \
--host 127.0.0.9 \ --host 127.0.0.9 \
--log-level warning \ --log-level warn \
--port 8000 & --port 8000 &
ROUTER_PID=$! ROUTER_PID=$!
......
...@@ -129,11 +129,14 @@ impl CacheAwarePolicy { ...@@ -129,11 +129,14 @@ impl CacheAwarePolicy {
// Use "default" for unknown/empty model_ids for backward compatibility // Use "default" for unknown/empty model_ids for backward compatibility
let model_id = worker.model_id(); let model_id = worker.model_id();
let tree_key = if model_id.is_empty() || model_id == "unknown" { let tree_key = if model_id.is_empty() || model_id == "unknown" {
"default".to_string() "default"
} else { } else {
model_id.to_string() model_id
}; };
model_workers.entry(tree_key).or_default().push(worker); model_workers
.entry(tree_key.to_string())
.or_default()
.push(worker);
} }
// Initialize tree for each model // Initialize tree for each model
...@@ -153,11 +156,11 @@ impl CacheAwarePolicy { ...@@ -153,11 +156,11 @@ impl CacheAwarePolicy {
// use a default tree. This preserves existing behavior for single-model routers. // use a default tree. This preserves existing behavior for single-model routers.
let model_id = worker.model_id(); let model_id = worker.model_id();
let tree_key = if model_id.is_empty() || model_id == "unknown" { let tree_key = if model_id.is_empty() || model_id == "unknown" {
"default".to_string() "default"
} else { } else {
model_id.to_string() model_id
}; };
let tree = trees.entry(tree_key).or_insert_with(Tree::new); let tree = trees.entry(tree_key.to_string()).or_insert_with(Tree::new);
tree.insert("", worker.url()); tree.insert("", worker.url());
} }
} }
...@@ -176,11 +179,11 @@ impl CacheAwarePolicy { ...@@ -176,11 +179,11 @@ impl CacheAwarePolicy {
// Use same logic as add_worker for consistency // Use same logic as add_worker for consistency
let model_id = worker.model_id(); let model_id = worker.model_id();
let tree_key = if model_id.is_empty() || model_id == "unknown" { let tree_key = if model_id.is_empty() || model_id == "unknown" {
"default".to_string() "default"
} else { } else {
model_id.to_string() model_id
}; };
if let Some(tree) = trees.get_mut(&tree_key) { if let Some(tree) = trees.get_mut(tree_key) {
tree.remove_tenant(worker.url()); tree.remove_tenant(worker.url());
} }
} }
...@@ -222,17 +225,14 @@ impl LoadBalancingPolicy for CacheAwarePolicy { ...@@ -222,17 +225,14 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
return None; return None;
} }
// Group workers by model (using "default" for unknown/empty model_ids) // Determine the model for this set of workers (router pre-filters by model)
let mut model_workers: HashMap<String, Vec<usize>> = HashMap::new(); // All workers should be from the same model
for idx in &healthy_indices { let first_model = workers[healthy_indices[0]].model_id();
let model_id = workers[*idx].model_id(); let model_id = if first_model.is_empty() || first_model == "unknown" {
let tree_key = if model_id.is_empty() || model_id == "unknown" { "default"
"default".to_string() } else {
} else { first_model
model_id.to_string() };
};
model_workers.entry(tree_key).or_default().push(*idx);
}
// Get current load statistics // Get current load statistics
let loads: Vec<usize> = workers.iter().map(|w| w.load()).collect(); let loads: Vec<usize> = workers.iter().map(|w| w.load()).collect();
...@@ -267,13 +267,18 @@ impl LoadBalancingPolicy for CacheAwarePolicy { ...@@ -267,13 +267,18 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
// Even in imbalanced mode, update the tree to maintain cache state // Even in imbalanced mode, update the tree to maintain cache state
if let Some(text) = request_text { if let Some(text) = request_text {
if let Ok(mut trees) = self.trees.lock() { if let Ok(mut trees) = self.trees.lock() {
let model_id = workers[min_load_idx].model_id(); // Avoid allocation if tree already exists
let tree_key = if model_id.is_empty() || model_id == "unknown" { let tree = if let Some(tree) = trees.get_mut(model_id) {
"default".to_string() tree
} else { } else {
model_id.to_string() // Create new tree and initialize with all workers
let new_tree = Tree::new();
// Initialize with all healthy workers like OLD version does
for &idx in &healthy_indices {
new_tree.insert("", workers[idx].url());
}
trees.entry(model_id.to_string()).or_insert(new_tree)
}; };
let tree = trees.entry(tree_key).or_insert_with(Tree::new);
tree.insert(text, workers[min_load_idx].url()); tree.insert(text, workers[min_load_idx].url());
} }
} }
...@@ -290,84 +295,54 @@ impl LoadBalancingPolicy for CacheAwarePolicy { ...@@ -290,84 +295,54 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
let text = request_text.unwrap_or(""); let text = request_text.unwrap_or("");
if let Ok(mut trees) = self.trees.lock() { if let Ok(mut trees) = self.trees.lock() {
let mut best_match_idx: Option<usize> = None; // Avoid allocation if tree already exists
let mut best_match_rate: f32 = 0.0; let tree = if let Some(tree) = trees.get_mut(model_id) {
tree
// Find best match across all models } else {
for (model_id, worker_indices) in &model_workers { // Create new tree and initialize with all workers
let tree = trees.entry(model_id.clone()).or_insert_with(Tree::new); let new_tree = Tree::new();
// Initialize with all healthy workers like OLD version does
let (matched_text, matched_worker) = tree.prefix_match(text); for &idx in &healthy_indices {
let match_rate = if text.is_empty() { new_tree.insert("", workers[idx].url());
0.0
} else {
matched_text.chars().count() as f32 / text.chars().count() as f32
};
// Check if this model has the best match
if match_rate > best_match_rate {
// Find the worker index for this URL
if let Some(idx) = worker_indices
.iter()
.find(|&&idx| workers[idx].url() == matched_worker)
{
best_match_idx = Some(*idx);
best_match_rate = match_rate;
}
} }
} trees.entry(model_id.to_string()).or_insert(new_tree)
};
let (matched_text, matched_worker) = tree.prefix_match(text);
let match_rate = if text.is_empty() {
0.0
} else {
matched_text.chars().count() as f32 / text.chars().count() as f32
};
// Select worker based on cache threshold let selected_url = if match_rate > self.config.cache_threshold {
let selected_idx = if let (Some(idx), true) = (
best_match_idx,
best_match_rate > self.config.cache_threshold,
) {
RouterMetrics::record_cache_hit(); RouterMetrics::record_cache_hit();
idx matched_worker.to_string()
} else { } else {
RouterMetrics::record_cache_miss(); RouterMetrics::record_cache_miss();
tree.get_smallest_tenant()
};
// Find model with smallest tree (most cache capacity) // Find the index of the selected worker
let mut smallest_tree_model = String::new(); if let Some(selected_idx) = workers.iter().position(|w| w.url() == selected_url) {
let mut smallest_tree_size = usize::MAX; // Only proceed if the worker is healthy - use direct check like OLD version
if workers[selected_idx].is_healthy() {
// Update the tree with this request
tree.insert(text, &selected_url);
for model_id in model_workers.keys() { // Increment processed counter
let tree = trees.entry(model_id.clone()).or_insert_with(Tree::new); workers[selected_idx].increment_processed();
let size = tree.get_used_size_per_tenant().values().sum::<usize>(); RouterMetrics::record_processed_request(&selected_url);
if size < smallest_tree_size {
smallest_tree_size = size;
smallest_tree_model = model_id.clone();
}
}
// Select least loaded worker from model with most cache capacity return Some(selected_idx);
if let Some(worker_indices) = model_workers.get(&smallest_tree_model) {
worker_indices
.iter()
.min_by_key(|&&idx| workers[idx].load())
.copied()
.unwrap_or(healthy_indices[0])
} else {
healthy_indices[0]
} }
};
// Update the tree with this request
let model_id = workers[selected_idx].model_id();
let tree_key = if model_id.is_empty() || model_id == "unknown" {
"default".to_string()
} else { } else {
model_id.to_string() // Selected worker no longer exists, remove it from tree
}; tree.remove_tenant(&selected_url);
let tree = trees.entry(tree_key).or_insert_with(Tree::new); debug!("Removed stale worker {} from cache tree", selected_url);
tree.insert(text, workers[selected_idx].url()); }
// Increment processed counter
workers[selected_idx].increment_processed();
RouterMetrics::record_processed_request(workers[selected_idx].url());
RouterMetrics::record_policy_decision(self.name(), workers[selected_idx].url());
return Some(selected_idx); // Fallback to first healthy worker
return healthy_indices.first().copied();
} }
// Fallback to first healthy worker if tree operations fail // Fallback to first healthy worker if tree operations fail
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment