Unverified Commit f4d7ab7a authored by Yinghai Lu's avatar Yinghai Lu Committed by GitHub
Browse files

[sgl-router] improvement to avoid hang (#4482)


Co-authored-by: default avatarYineng Zhang <me@zhyncs.com>
Co-authored-by: default avatarByron Hsu <byronhsu1230@gmail.com>
parent c38ca4fc
use crate::router::PolicyConfig; use crate::router::PolicyConfig;
use crate::router::Router; use crate::router::Router;
use actix_web::{get, post, web, App, HttpRequest, HttpResponse, HttpServer, Responder}; use actix_web::{
error, get, post, web, App, Error, HttpRequest, HttpResponse, HttpServer, Responder,
};
use bytes::Bytes; use bytes::Bytes;
use env_logger::Builder; use env_logger::Builder;
use futures_util::StreamExt;
use log::{info, LevelFilter}; use log::{info, LevelFilter};
use std::collections::HashMap; use std::collections::HashMap;
use std::io::Write; use std::io::Write;
use std::time::Duration;
#[derive(Debug)] #[derive(Debug)]
pub struct AppState { pub struct AppState {
...@@ -25,6 +29,22 @@ impl AppState { ...@@ -25,6 +29,22 @@ impl AppState {
} }
} }
async fn sink_handler(_req: HttpRequest, mut payload: web::Payload) -> Result<HttpResponse, Error> {
// Drain the payload
while let Some(chunk) = payload.next().await {
if let Err(err) = chunk {
println!("Error while draining payload: {:?}", err);
break;
}
}
Ok(HttpResponse::NotFound().finish())
}
// Custom error handler for JSON payload errors.
fn json_error_handler(_err: error::JsonPayloadError, _req: &HttpRequest) -> Error {
error::ErrorPayloadTooLarge("Payload too large")
}
#[get("/health")] #[get("/health")]
async fn health(req: HttpRequest, data: web::Data<AppState>) -> impl Responder { async fn health(req: HttpRequest, data: web::Data<AppState>) -> impl Responder {
data.router data.router
...@@ -162,6 +182,7 @@ pub async fn startup(config: ServerConfig) -> std::io::Result<()> { ...@@ -162,6 +182,7 @@ pub async fn startup(config: ServerConfig) -> std::io::Result<()> {
); );
let client = reqwest::Client::builder() let client = reqwest::Client::builder()
.pool_idle_timeout(Some(Duration::from_secs(50)))
.build() .build()
.expect("Failed to create HTTP client"); .expect("Failed to create HTTP client");
...@@ -180,7 +201,11 @@ pub async fn startup(config: ServerConfig) -> std::io::Result<()> { ...@@ -180,7 +201,11 @@ pub async fn startup(config: ServerConfig) -> std::io::Result<()> {
HttpServer::new(move || { HttpServer::new(move || {
App::new() App::new()
.app_data(app_state.clone()) .app_data(app_state.clone())
.app_data(web::JsonConfig::default().limit(config.max_payload_size)) .app_data(
web::JsonConfig::default()
.limit(config.max_payload_size)
.error_handler(json_error_handler),
)
.app_data(web::PayloadConfig::default().limit(config.max_payload_size)) .app_data(web::PayloadConfig::default().limit(config.max_payload_size))
.service(generate) .service(generate)
.service(v1_chat_completions) .service(v1_chat_completions)
...@@ -192,6 +217,8 @@ pub async fn startup(config: ServerConfig) -> std::io::Result<()> { ...@@ -192,6 +217,8 @@ pub async fn startup(config: ServerConfig) -> std::io::Result<()> {
.service(get_server_info) .service(get_server_info)
.service(add_worker) .service(add_worker)
.service(remove_worker) .service(remove_worker)
// Default handler for unmatched routes.
.default_service(web::route().to(sink_handler))
}) })
.bind((config.host, config.port))? .bind((config.host, config.port))?
.run() .run()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment