//! Router implementations use async_trait::async_trait; use axum::{ body::Body, extract::Request, http::{HeaderMap, StatusCode}, response::{IntoResponse, Response}, }; use std::fmt::Debug; use crate::protocols::spec::{ ChatCompletionRequest, CompletionRequest, GenerateRequest, RerankRequest, ResponsesRequest, }; pub mod factory; pub mod grpc; pub mod header_utils; pub mod http; pub mod router_manager; pub use factory::RouterFactory; // Re-export HTTP routers for convenience (keeps routers::openai_router path working) pub use http::{openai_router, pd_router, pd_types, router}; /// Worker management trait for administrative operations /// /// This trait is separate from RouterTrait to allow Send futures /// for use in service discovery and other background tasks #[async_trait] pub trait WorkerManagement: Send + Sync { /// Add a worker to the router async fn add_worker(&self, worker_url: &str) -> Result; /// Remove a worker from the router fn remove_worker(&self, worker_url: &str); /// Get all worker URLs fn get_worker_urls(&self) -> Vec; } /// Core trait for all router implementations /// /// This trait provides a unified interface for routing requests, /// regardless of whether it's a regular router or PD router. #[async_trait] pub trait RouterTrait: Send + Sync + Debug + WorkerManagement { /// Get a reference to self as Any for downcasting fn as_any(&self) -> &dyn std::any::Any; /// Route a health check request async fn health(&self, req: Request) -> Response; /// Route a health generate request async fn health_generate(&self, req: Request) -> Response; /// Get server information async fn get_server_info(&self, req: Request) -> Response; /// Get available models async fn get_models(&self, req: Request) -> Response; /// Get model information async fn get_model_info(&self, req: Request) -> Response; /// Route a generate request async fn route_generate( &self, headers: Option<&HeaderMap>, body: &GenerateRequest, model_id: Option<&str>, ) -> Response; /// Route a chat completion request async fn route_chat( &self, headers: Option<&HeaderMap>, body: &ChatCompletionRequest, model_id: Option<&str>, ) -> Response; /// Route a completion request async fn route_completion( &self, headers: Option<&HeaderMap>, body: &CompletionRequest, model_id: Option<&str>, ) -> Response; /// Route a responses request async fn route_responses( &self, headers: Option<&HeaderMap>, body: &ResponsesRequest, model_id: Option<&str>, ) -> Response; async fn route_embeddings(&self, headers: Option<&HeaderMap>, body: Body) -> Response; async fn route_rerank( &self, headers: Option<&HeaderMap>, body: &RerankRequest, model_id: Option<&str>, ) -> Response; /// Flush cache on all workers async fn flush_cache(&self) -> Response; /// Get worker loads (for monitoring) async fn get_worker_loads(&self) -> Response; /// Get router type name fn router_type(&self) -> &'static str; /// Check if this is a PD router fn is_pd_mode(&self) -> bool { self.router_type() == "pd" } /// Server liveness check - is the server process running fn liveness(&self) -> Response { // Simple liveness check - if we can respond, we're alive (StatusCode::OK, "OK").into_response() } /// Server readiness check - is the server ready to handle requests fn readiness(&self) -> Response; }