//! Load balancing policies for SGLang router //! //! This module provides a unified abstraction for routing policies that work //! across both regular and prefill-decode (PD) routing modes. use crate::core::Worker; use std::fmt::Debug; mod cache_aware; mod factory; mod power_of_two; mod random; mod round_robin; pub use cache_aware::CacheAwarePolicy; pub use factory::PolicyFactory; pub use power_of_two::PowerOfTwoPolicy; pub use random::RandomPolicy; pub use round_robin::RoundRobinPolicy; /// Core trait for load balancing policies /// /// This trait provides a unified interface for implementing routing algorithms /// that can work with both regular single-worker selection and PD dual-worker selection. pub trait LoadBalancingPolicy: Send + Sync + Debug { /// Select a single worker from the available workers /// /// This is used for regular routing mode where requests go to a single worker. fn select_worker( &self, workers: &[Box], request_text: Option<&str>, ) -> Option; /// Select a pair of workers (prefill and decode) for PD routing /// /// Returns indices of (prefill_worker, decode_worker) from their respective arrays. /// Default implementation uses select_worker for each array independently. fn select_worker_pair( &self, prefill_workers: &[Box], decode_workers: &[Box], request_text: Option<&str>, ) -> Option<(usize, usize)> { // Default implementation: independently select from each pool let prefill_idx = self.select_worker(prefill_workers, request_text)?; let decode_idx = self.select_worker(decode_workers, request_text)?; Some((prefill_idx, decode_idx)) } /// Update policy state after request completion /// /// This is called when a request completes (successfully or not) to allow /// policies to update their internal state. fn on_request_complete(&self, _worker_url: &str, _success: bool) { // Default: no-op for stateless policies } /// Get policy name for metrics and debugging fn name(&self) -> &'static str; /// Update worker load information /// /// This is called periodically with current load information for load-aware policies. fn update_loads(&self, _loads: &std::collections::HashMap) { // Default: no-op for policies that don't use load information } /// Reset any internal state /// /// This is useful for policies that maintain state (e.g., round-robin counters). fn reset(&self) { // Default: no-op for stateless policies } /// Get as Any for downcasting fn as_any(&self) -> &dyn std::any::Any; } /// Configuration for cache-aware policy #[derive(Debug, Clone)] pub struct CacheAwareConfig { pub cache_threshold: f32, pub balance_abs_threshold: usize, pub balance_rel_threshold: f32, pub eviction_interval_secs: u64, pub max_tree_size: usize, } impl Default for CacheAwareConfig { fn default() -> Self { Self { cache_threshold: 0.5, balance_abs_threshold: 32, balance_rel_threshold: 1.1, eviction_interval_secs: 30, max_tree_size: 10000, } } } /// Helper function to filter healthy workers and return their indices pub(crate) fn get_healthy_worker_indices(workers: &[Box]) -> Vec { workers .iter() .enumerate() .filter(|(_, w)| w.is_healthy()) .map(|(idx, _)| idx) .collect() } #[cfg(test)] mod tests { use super::*; use crate::core::{BasicWorker, WorkerType}; #[test] fn test_get_healthy_worker_indices() { let workers: Vec> = vec![ Box::new(BasicWorker::new( "http://w1:8000".to_string(), WorkerType::Regular, )), Box::new(BasicWorker::new( "http://w2:8000".to_string(), WorkerType::Regular, )), Box::new(BasicWorker::new( "http://w3:8000".to_string(), WorkerType::Regular, )), ]; // All healthy initially let indices = get_healthy_worker_indices(&workers); assert_eq!(indices, vec![0, 1, 2]); // Mark one unhealthy workers[1].set_healthy(false); let indices = get_healthy_worker_indices(&workers); assert_eq!(indices, vec![0, 2]); } }