mod.rs 4.28 KB
Newer Older
1
2
3
//! Router implementations

use async_trait::async_trait;
4
5
6
7
8
9
use axum::{
    body::Body,
    extract::Request,
    http::{HeaderMap, StatusCode},
    response::{IntoResponse, Response},
};
10
11
use std::fmt::Debug;

12
use crate::protocols::spec::{
13
    ChatCompletionRequest, CompletionRequest, EmbeddingRequest, GenerateRequest, RerankRequest,
14
    ResponsesGetParams, ResponsesRequest,
15
};
16

17
pub mod factory;
18
pub mod grpc;
19
pub mod header_utils;
20
pub mod http;
21
pub mod router_manager;
22
23

pub use factory::RouterFactory;
24

25
26
// Re-export HTTP routers for convenience (keeps routers::openai_router path working)
pub use http::{openai_router, pd_router, pd_types, router};
27
28
29
30
31

/// Core trait for all router implementations
///
/// This trait provides a unified interface for routing requests,
/// regardless of whether it's a regular router or PD router.
32
#[async_trait]
33
pub trait RouterTrait: Send + Sync + Debug {
34
35
    /// Get a reference to self as Any for downcasting
    fn as_any(&self) -> &dyn std::any::Any;
36

37
    /// Route a health check request
38
    async fn health(&self, req: Request<Body>) -> Response;
39
40

    /// Route a health generate request
41
    async fn health_generate(&self, req: Request<Body>) -> Response;
42
43

    /// Get server information
44
    async fn get_server_info(&self, req: Request<Body>) -> Response;
45
46

    /// Get available models
47
    async fn get_models(&self, req: Request<Body>) -> Response;
48
49

    /// Get model information
50
    async fn get_model_info(&self, req: Request<Body>) -> Response;
51
52

    /// Route a generate request
53
54
55
56
57
58
    async fn route_generate(
        &self,
        headers: Option<&HeaderMap>,
        body: &GenerateRequest,
        model_id: Option<&str>,
    ) -> Response;
59
60
61
62

    /// Route a chat completion request
    async fn route_chat(
        &self,
63
64
        headers: Option<&HeaderMap>,
        body: &ChatCompletionRequest,
65
        model_id: Option<&str>,
66
    ) -> Response;
67
68
69
70

    /// Route a completion request
    async fn route_completion(
        &self,
71
72
        headers: Option<&HeaderMap>,
        body: &CompletionRequest,
73
        model_id: Option<&str>,
74
    ) -> Response;
75

76
77
78
79
80
    /// Route a responses request
    async fn route_responses(
        &self,
        headers: Option<&HeaderMap>,
        body: &ResponsesRequest,
81
        model_id: Option<&str>,
82
83
    ) -> Response;

84
    /// Retrieve a stored/background response by id
85
86
87
88
89
90
    async fn get_response(
        &self,
        headers: Option<&HeaderMap>,
        response_id: &str,
        params: &ResponsesGetParams,
    ) -> Response;
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

    /// Cancel a background response by id
    async fn cancel_response(&self, headers: Option<&HeaderMap>, response_id: &str) -> Response;

    /// Delete a response by id
    async fn delete_response(&self, _headers: Option<&HeaderMap>, _response_id: &str) -> Response {
        (
            StatusCode::NOT_IMPLEMENTED,
            "Responses delete endpoint not implemented",
        )
            .into_response()
    }

    /// List input items of a response by id
    async fn list_response_input_items(
        &self,
        _headers: Option<&HeaderMap>,
        _response_id: &str,
    ) -> Response {
        (
            StatusCode::NOT_IMPLEMENTED,
            "Responses list input items endpoint not implemented",
        )
            .into_response()
    }

117
118
119
120
121
122
123
    /// Route embedding requests (OpenAI-compatible /v1/embeddings)
    async fn route_embeddings(
        &self,
        headers: Option<&HeaderMap>,
        body: &EmbeddingRequest,
        model_id: Option<&str>,
    ) -> Response;
124

125
126
127
128
129
130
    async fn route_rerank(
        &self,
        headers: Option<&HeaderMap>,
        body: &RerankRequest,
        model_id: Option<&str>,
    ) -> Response;
131

132
    /// Flush cache on all workers
133
    async fn flush_cache(&self) -> Response;
134
135

    /// Get worker loads (for monitoring)
136
    async fn get_worker_loads(&self) -> Response;
137
138
139
140
141
142
143
144
145
146

    /// Get router type name
    fn router_type(&self) -> &'static str;

    /// Check if this is a PD router
    fn is_pd_mode(&self) -> bool {
        self.router_type() == "pd"
    }

    /// Server liveness check - is the server process running
147
    fn liveness(&self) -> Response {
148
        // Simple liveness check - if we can respond, we're alive
149
        (StatusCode::OK, "OK").into_response()
150
151
152
    }

    /// Server readiness check - is the server ready to handle requests
153
    fn readiness(&self) -> Response;
154
}