Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c4943867
"vscode:/vscode.git/clone" did not exist on "9d853977d26850db9e9e6b689a6e25da0fa291c2"
Unverified
Commit
c4943867
authored
Jun 16, 2025
by
Liangsheng Yin
Committed by
GitHub
Jun 16, 2025
Browse files
minor fix (#7245)
parent
53a525bf
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
3 deletions
+15
-3
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+3
-3
sgl-pdlb/src/server.rs
sgl-pdlb/src/server.rs
+12
-0
No files found.
python/sglang/srt/managers/scheduler.py
View file @
c4943867
...
...
@@ -391,7 +391,7 @@ class Scheduler(
self
.
forward_ct
=
0
self
.
forward_ct_decode
=
0
self
.
num_generated_tokens
=
0
self
.
num
_prefill_tokens
=
0
self
.
last
_prefill_tokens
=
0
self
.
last_decode_stats_tic
=
time
.
perf_counter
()
self
.
last_prefill_stats_tic
=
time
.
perf_counter
()
self
.
return_health_check_ct
=
0
...
...
@@ -1194,8 +1194,8 @@ class Scheduler(
):
gap_latency
=
time
.
perf_counter
()
-
self
.
last_prefill_stats_tic
self
.
last_prefill_stats_tic
=
time
.
perf_counter
()
self
.
last_input_throughput
=
self
.
num
_prefill_tokens
/
gap_latency
self
.
num
_prefill_tokens
=
0
self
.
last_input_throughput
=
self
.
last
_prefill_tokens
/
gap_latency
self
.
last
_prefill_tokens
=
adder
.
log_input_tokens
num_used
=
self
.
max_total_num_tokens
-
(
self
.
token_to_kv_pool_allocator
.
available_size
()
...
...
sgl-pdlb/src/server.rs
View file @
c4943867
...
...
@@ -60,6 +60,17 @@ pub async fn generate(
.await
}
#[post(
"/v1/completions"
)]
pub
async
fn
completions
(
_
req
:
HttpRequest
,
req
:
web
::
Json
<
GenerateReqInput
>
,
app_state
:
web
::
Data
<
LBState
>
,
)
->
Result
<
HttpResponse
,
actix_web
::
Error
>
{
app_state
.generate
(
"/v1/completions"
,
Box
::
new
(
req
.into_inner
()))
.await
}
#[post(
"/v1/chat/completions"
)]
pub
async
fn
chat_completions
(
_
req
:
HttpRequest
,
...
...
@@ -162,6 +173,7 @@ pub async fn startup(lb_config: LBConfig, lb_state: LBState) -> std::io::Result<
.service
(
get_loads
)
.service
(
generate
)
.service
(
chat_completions
)
.service
(
completions
)
})
.bind
((
lb_config
.host
,
lb_config
.port
))
?
.run
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment