mod.rs 3.4 KB
Newer Older
1
2
3
//! Router implementations

use async_trait::async_trait;
4
5
6
7
8
9
use axum::{
    body::Body,
    extract::Request,
    http::{HeaderMap, StatusCode},
    response::{IntoResponse, Response},
};
10
11
use std::fmt::Debug;

12
13
14
use crate::protocols::spec::{
    ChatCompletionRequest, CompletionRequest, GenerateRequest, ResponsesRequest,
};
15

16
pub mod factory;
17
pub mod grpc;
18
pub mod header_utils;
19
pub mod http;
20
21

pub use factory::RouterFactory;
22
23
// Re-export HTTP routers for convenience (keeps routers::openai_router path working)
pub use http::{openai_router, pd_router, pd_types, router};
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

/// Worker management trait for administrative operations
///
/// This trait is separate from RouterTrait to allow Send futures
/// for use in service discovery and other background tasks
#[async_trait]
pub trait WorkerManagement: Send + Sync {
    /// Add a worker to the router
    async fn add_worker(&self, worker_url: &str) -> Result<String, String>;

    /// Remove a worker from the router
    fn remove_worker(&self, worker_url: &str);

    /// Get all worker URLs
    fn get_worker_urls(&self) -> Vec<String>;
}

/// Core trait for all router implementations
///
/// This trait provides a unified interface for routing requests,
/// regardless of whether it's a regular router or PD router.
45
#[async_trait]
46
47
48
pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
    /// Get a reference to self as Any for downcasting
    fn as_any(&self) -> &dyn std::any::Any;
49

50
    /// Route a health check request
51
    async fn health(&self, req: Request<Body>) -> Response;
52
53

    /// Route a health generate request
54
    async fn health_generate(&self, req: Request<Body>) -> Response;
55
56

    /// Get server information
57
    async fn get_server_info(&self, req: Request<Body>) -> Response;
58
59

    /// Get available models
60
    async fn get_models(&self, req: Request<Body>) -> Response;
61
62

    /// Get model information
63
    async fn get_model_info(&self, req: Request<Body>) -> Response;
64
65

    /// Route a generate request
66
67
    async fn route_generate(&self, headers: Option<&HeaderMap>, body: &GenerateRequest)
        -> Response;
68
69
70
71

    /// Route a chat completion request
    async fn route_chat(
        &self,
72
73
74
        headers: Option<&HeaderMap>,
        body: &ChatCompletionRequest,
    ) -> Response;
75
76
77
78

    /// Route a completion request
    async fn route_completion(
        &self,
79
80
81
        headers: Option<&HeaderMap>,
        body: &CompletionRequest,
    ) -> Response;
82

83
84
85
86
87
88
89
    /// Route a responses request
    async fn route_responses(
        &self,
        headers: Option<&HeaderMap>,
        body: &ResponsesRequest,
    ) -> Response;

90
91
92
93
    async fn route_embeddings(&self, headers: Option<&HeaderMap>, body: Body) -> Response;

    async fn route_rerank(&self, headers: Option<&HeaderMap>, body: Body) -> Response;

94
    /// Flush cache on all workers
95
    async fn flush_cache(&self) -> Response;
96
97

    /// Get worker loads (for monitoring)
98
    async fn get_worker_loads(&self) -> Response;
99
100
101
102
103
104
105
106
107
108

    /// Get router type name
    fn router_type(&self) -> &'static str;

    /// Check if this is a PD router
    fn is_pd_mode(&self) -> bool {
        self.router_type() == "pd"
    }

    /// Server liveness check - is the server process running
109
    fn liveness(&self) -> Response {
110
        // Simple liveness check - if we can respond, we're alive
111
        (StatusCode::OK, "OK").into_response()
112
113
114
    }

    /// Server readiness check - is the server ready to handle requests
115
    fn readiness(&self) -> Response;
116
}