minor fix (#7245)

c4943867 · Liangsheng Yin · GitHub · 53a525bf · c4943867 · c4943867
Unverified Commit c4943867 authored Jun 16, 2025 by Liangsheng Yin Committed by GitHub Jun 16, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 3 deletions

python/sglang/srt/managers/scheduler.py python/sglang/srt/managers/scheduler.py +3 -3

sgl-pdlb/src/server.rs sgl-pdlb/src/server.rs +12 -0

No files found.
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -391,7 +391,7 @@ class Scheduler(
        self.forward_ct = 0
        self.forward_ct_decode = 0
        self.num_generated_tokens = 0
-        self.num_prefill_tokens = 0
+        self.last_prefill_tokens = 0
        self.last_decode_stats_tic = time.perf_counter()
        self.last_prefill_stats_tic = time.perf_counter()
        self.return_health_check_ct = 0
@@ -1194,8 +1194,8 @@ class Scheduler(
    ):
        gap_latency = time.perf_counter() - self.last_prefill_stats_tic
        self.last_prefill_stats_tic = time.perf_counter()
-        self.last_input_throughput = self.num_prefill_tokens / gap_latency
+        self.last_input_throughput = self.last_prefill_tokens / gap_latency
-        self.num_prefill_tokens = 0
+        self.last_prefill_tokens = adder.log_input_tokens
        num_used = self.max_total_num_tokens - (
            self.token_to_kv_pool_allocator.available_size()

--- a/sgl-pdlb/src/server.rs
+++ b/sgl-pdlb/src/server.rs
@@ -60,6 +60,17 @@ pub async fn generate(
        .await
 }
+#[post("/v1/completions")]
+pub async fn completions(
+    _req: HttpRequest,
+    req: web::Json<GenerateReqInput>,
+    app_state: web::Data<LBState>,
+) -> Result<HttpResponse, actix_web::Error> {
+    app_state
+        .generate("/v1/completions", Box::new(req.into_inner()))
+        .await
+}
 #[post("/v1/chat/completions")]
 pub async fn chat_completions(
    _req: HttpRequest,
@@ -162,6 +173,7 @@ pub async fn startup(lb_config: LBConfig, lb_state: LBState) -> std::io::Result<
            .service(get_loads)
            .service(generate)
            .service(chat_completions)
+            .service(completions)
    })
    .bind((lb_config.host, lb_config.port))?
    .run()