server.rs 5.46 KB
Newer Older
1
use crate::router::PolicyConfig;
2
use crate::router::Router;
3
use actix_web::{get, post, web, App, HttpRequest, HttpResponse, HttpServer, Responder};
4
use bytes::Bytes;
5
use env_logger::Builder;
Byron Hsu's avatar
Byron Hsu committed
6
use log::{info, LevelFilter};
7
use std::collections::HashMap;
8
use std::io::Write;
9
10
11

#[derive(Debug)]
pub struct AppState {
12
    router: Router,
13
14
15
    client: reqwest::Client,
}

16
impl AppState {
17
18
19
20
    pub fn new(
        worker_urls: Vec<String>,
        client: reqwest::Client,
        policy_config: PolicyConfig,
21
    ) -> Result<Self, String> {
22
        // Create router based on policy
23
24
        let router = Router::new(worker_urls, policy_config)?;
        Ok(Self { router, client })
25
26
27
    }
}

28
29
#[get("/health")]
async fn health(data: web::Data<AppState>) -> impl Responder {
30
    data.router.route_to_first(&data.client, "/health").await
31
32
33
34
}

#[get("/health_generate")]
async fn health_generate(data: web::Data<AppState>) -> impl Responder {
35
36
37
    data.router
        .route_to_first(&data.client, "/health_generate")
        .await
38
39
}

40
41
#[get("/get_server_info")]
async fn get_server_info(data: web::Data<AppState>) -> impl Responder {
42
43
44
    data.router
        .route_to_first(&data.client, "/get_server_info")
        .await
45
46
}

47
#[get("/v1/models")]
48
async fn v1_models(data: web::Data<AppState>) -> impl Responder {
49
    data.router.route_to_first(&data.client, "/v1/models").await
50
51
}

52
53
#[get("/get_model_info")]
async fn get_model_info(data: web::Data<AppState>) -> impl Responder {
54
55
56
    data.router
        .route_to_first(&data.client, "/get_model_info")
        .await
57
}
58

59
60
#[post("/generate")]
async fn generate(req: HttpRequest, body: Bytes, data: web::Data<AppState>) -> impl Responder {
61
    data.router
62
        .route_generate_request(&data.client, &req, &body, "/generate")
63
64
65
66
67
68
69
70
71
72
        .await
}

#[post("/v1/chat/completions")]
async fn v1_chat_completions(
    req: HttpRequest,
    body: Bytes,
    data: web::Data<AppState>,
) -> impl Responder {
    data.router
73
        .route_generate_request(&data.client, &req, &body, "/v1/chat/completions")
74
75
76
77
78
79
80
81
82
83
        .await
}

#[post("/v1/completions")]
async fn v1_completions(
    req: HttpRequest,
    body: Bytes,
    data: web::Data<AppState>,
) -> impl Responder {
    data.router
84
        .route_generate_request(&data.client, &req, &body, "/v1/completions")
85
        .await
86
87
}

88
89
90
91
92
93
94
95
96
97
98
99
#[post("/add_worker")]
async fn add_worker(
    query: web::Query<HashMap<String, String>>,
    data: web::Data<AppState>,
) -> impl Responder {
    let worker_url = match query.get("url") {
        Some(url) => url.to_string(),
        None => {
            return HttpResponse::BadRequest()
                .body("Worker URL required. Provide 'url' query parameter")
        }
    };
100

101
    match data.router.add_worker(&worker_url).await {
102
103
104
        Ok(message) => HttpResponse::Ok().body(message),
        Err(error) => HttpResponse::BadRequest().body(error),
    }
105
106
}

107
108
109
110
111
112
113
114
115
#[post("/remove_worker")]
async fn remove_worker(
    query: web::Query<HashMap<String, String>>,
    data: web::Data<AppState>,
) -> impl Responder {
    let worker_url = match query.get("url") {
        Some(url) => url.to_string(),
        None => return HttpResponse::BadRequest().finish(),
    };
116
    data.router.remove_worker(&worker_url);
117
    HttpResponse::Ok().body(format!("Successfully removed worker: {}", worker_url))
118
119
}

120
121
122
123
124
125
pub struct ServerConfig {
    pub host: String,
    pub port: u16,
    pub worker_urls: Vec<String>,
    pub policy_config: PolicyConfig,
    pub verbose: bool,
126
    pub max_payload_size: usize,
127
128
129
}

pub async fn startup(config: ServerConfig) -> std::io::Result<()> {
130
    // Initialize logger
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
    Builder::new()
        .format(|buf, record| {
            use chrono::Local;
            writeln!(
                buf,
                "[Router (Rust)] {} - {} - {}",
                Local::now().format("%Y-%m-%d %H:%M:%S"),
                record.level(),
                record.args()
            )
        })
        .filter(
            None,
            if config.verbose {
                LevelFilter::Debug
            } else {
                LevelFilter::Info
            },
        )
        .init();

152
153
154
155
156
157
158
159
    info!("🚧 Initializing router on {}:{}", config.host, config.port);
    info!("🚧 Initializing workers on {:?}", config.worker_urls);
    info!("🚧 Policy Config: {:?}", config.policy_config);
    info!(
        "🚧 Max payload size: {} MB",
        config.max_payload_size / (1024 * 1024)
    );

160
161
162
163
    let client = reqwest::Client::builder()
        .build()
        .expect("Failed to create HTTP client");

164
165
166
167
168
169
170
    let app_state = web::Data::new(
        AppState::new(
            config.worker_urls.clone(),
            client,
            config.policy_config.clone(),
        )
        .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?,
171
    );
172

173
174
175
    info!("✅ Serving router on {}:{}", config.host, config.port);
    info!("✅ Serving workers on {:?}", config.worker_urls);

176
177
178
    HttpServer::new(move || {
        App::new()
            .app_data(app_state.clone())
179
180
            .app_data(web::JsonConfig::default().limit(config.max_payload_size))
            .app_data(web::PayloadConfig::default().limit(config.max_payload_size))
181
            .service(generate)
182
183
184
            .service(v1_chat_completions)
            .service(v1_completions)
            .service(v1_models)
185
            .service(get_model_info)
186
187
            .service(health)
            .service(health_generate)
188
            .service(get_server_info)
189
            .service(add_worker)
190
            .service(remove_worker)
191
    })
192
    .bind((config.host, config.port))?
193
194
    .run()
    .await
195
}