Unverified Commit c4943867 authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

minor fix (#7245)

parent 53a525bf
...@@ -391,7 +391,7 @@ class Scheduler( ...@@ -391,7 +391,7 @@ class Scheduler(
self.forward_ct = 0 self.forward_ct = 0
self.forward_ct_decode = 0 self.forward_ct_decode = 0
self.num_generated_tokens = 0 self.num_generated_tokens = 0
self.num_prefill_tokens = 0 self.last_prefill_tokens = 0
self.last_decode_stats_tic = time.perf_counter() self.last_decode_stats_tic = time.perf_counter()
self.last_prefill_stats_tic = time.perf_counter() self.last_prefill_stats_tic = time.perf_counter()
self.return_health_check_ct = 0 self.return_health_check_ct = 0
...@@ -1194,8 +1194,8 @@ class Scheduler( ...@@ -1194,8 +1194,8 @@ class Scheduler(
): ):
gap_latency = time.perf_counter() - self.last_prefill_stats_tic gap_latency = time.perf_counter() - self.last_prefill_stats_tic
self.last_prefill_stats_tic = time.perf_counter() self.last_prefill_stats_tic = time.perf_counter()
self.last_input_throughput = self.num_prefill_tokens / gap_latency self.last_input_throughput = self.last_prefill_tokens / gap_latency
self.num_prefill_tokens = 0 self.last_prefill_tokens = adder.log_input_tokens
num_used = self.max_total_num_tokens - ( num_used = self.max_total_num_tokens - (
self.token_to_kv_pool_allocator.available_size() self.token_to_kv_pool_allocator.available_size()
......
...@@ -60,6 +60,17 @@ pub async fn generate( ...@@ -60,6 +60,17 @@ pub async fn generate(
.await .await
} }
#[post("/v1/completions")]
pub async fn completions(
_req: HttpRequest,
req: web::Json<GenerateReqInput>,
app_state: web::Data<LBState>,
) -> Result<HttpResponse, actix_web::Error> {
app_state
.generate("/v1/completions", Box::new(req.into_inner()))
.await
}
#[post("/v1/chat/completions")] #[post("/v1/chat/completions")]
pub async fn chat_completions( pub async fn chat_completions(
_req: HttpRequest, _req: HttpRequest,
...@@ -162,6 +173,7 @@ pub async fn startup(lb_config: LBConfig, lb_state: LBState) -> std::io::Result< ...@@ -162,6 +173,7 @@ pub async fn startup(lb_config: LBConfig, lb_state: LBState) -> std::io::Result<
.service(get_loads) .service(get_loads)
.service(generate) .service(generate)
.service(chat_completions) .service(chat_completions)
.service(completions)
}) })
.bind((lb_config.host, lb_config.port))? .bind((lb_config.host, lb_config.port))?
.run() .run()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment