Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
f7655790
Unverified
Commit
f7655790
authored
Apr 14, 2025
by
Liangsheng Yin
Committed by
GitHub
Apr 14, 2025
Browse files
Fix typo: infight -> inflight (#5357)
parent
f58b929a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
14 deletions
+14
-14
python/sglang/srt/disaggregation/prefill.py
python/sglang/srt/disaggregation/prefill.py
+10
-10
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+4
-4
No files found.
python/sglang/srt/disaggregation/prefill.py
View file @
f7655790
...
...
@@ -10,9 +10,9 @@ Life cycle of a request in the prefill server
2. Waiting Queue
a. Use PrefillAdder to pop requests
b. Run forward
c. Add the request to Infight Queue
c. Add the request to Inf
l
ight Queue
3. Infight Queue
3. Inf
l
ight Queue
a. Poll (non-blocking) the sender of the request
b. Once the transfer has finished, return the request
"""
...
...
@@ -162,7 +162,7 @@ class SchedulerDisaggregationPrefillMixin:
self
:
Scheduler
,
batch
:
ScheduleBatch
,
result
:
GenerationBatchResult
)
->
None
:
"""
Transfer kv for prefill completed requests and add it into disagg_prefill_infight_queue
Transfer kv for prefill completed requests and add it into disagg_prefill_inf
l
ight_queue
Adapted from process_batch_result_prefill
"""
...
...
@@ -175,7 +175,7 @@ class SchedulerDisaggregationPrefillMixin:
req
.
output_ids
.
append
(
next_token_id
)
self
.
tree_cache
.
cache_unfinished_req
(
req
)
# update the tree and lock
self
.
send_kv_chunk
(
req
,
token_id
=
next_token_id
)
self
.
disagg_prefill_infight_queue
.
append
(
req
)
self
.
disagg_prefill_inf
l
ight_queue
.
append
(
req
)
else
:
# being chunked reqs' prefill is not finished
req
.
is_chunked
-=
1
...
...
@@ -187,22 +187,22 @@ class SchedulerDisaggregationPrefillMixin:
self
.
current_stream
.
synchronize
()
batch
.
next_batch_sampling_info
.
sampling_info_done
.
set
()
def
process_disagg_prefill_infight_queue
(
self
:
Scheduler
)
->
None
:
def
process_disagg_prefill_inf
l
ight_queue
(
self
:
Scheduler
)
->
None
:
"""
Poll the requests in the middle of transfer. If done, return the request.
"""
assert
len
(
self
.
disagg_prefill_infight_queue
)
>
0
assert
len
(
self
.
disagg_prefill_inf
l
ight_queue
)
>
0
done_reqs
=
[]
polls
=
poll_and_all_reduce
(
[
req
.
disagg_kv_sender
for
req
in
self
.
disagg_prefill_infight_queue
],
[
req
.
disagg_kv_sender
for
req
in
self
.
disagg_prefill_inf
l
ight_queue
],
self
.
tp_worker
.
get_tp_cpu_group
(),
)
undone_reqs
:
List
[
Req
]
=
[]
# Check .poll() for the reqs in disagg_prefill_infight_queue. If Success, respond to the client and remove it from the queue
for
req
,
poll
in
zip
(
self
.
disagg_prefill_infight_queue
,
polls
):
# Check .poll() for the reqs in disagg_prefill_inf
l
ight_queue. If Success, respond to the client and remove it from the queue
for
req
,
poll
in
zip
(
self
.
disagg_prefill_inf
l
ight_queue
,
polls
):
if
poll
in
[
KVPoll
.
WaitingForInput
,
KVPoll
.
Transferring
]:
undone_reqs
.
append
(
req
)
elif
poll
==
KVPoll
.
Success
:
# transfer done
...
...
@@ -215,7 +215,7 @@ class SchedulerDisaggregationPrefillMixin:
# Stream requests which have finished transfer
self
.
stream_output
(
done_reqs
,
False
,
None
)
self
.
disagg_prefill_infight_queue
=
undone_reqs
self
.
disagg_prefill_inf
l
ight_queue
=
undone_reqs
def
process_prefill_chunk
(
self
:
Scheduler
)
->
None
:
if
self
.
last_batch
and
self
.
last_batch
.
forward_mode
.
is_extend
():
...
...
python/sglang/srt/managers/scheduler.py
View file @
f7655790
...
...
@@ -594,7 +594,7 @@ class Scheduler(
gloo_group
=
self
.
tp_worker
.
get_attention_tp_cpu_group
(),
)
# The prefill requests that are in the middle of kv sending
self
.
disagg_prefill_infight_queue
:
List
[
Req
]
=
[]
self
.
disagg_prefill_inf
l
ight_queue
:
List
[
Req
]
=
[]
@
DynamicGradMode
()
def
event_loop_normal
(
self
):
...
...
@@ -674,10 +674,10 @@ class Scheduler(
result
=
self
.
run_batch
(
batch
)
self
.
process_batch_result_disagg_prefill
(
batch
,
result
)
if
len
(
self
.
disagg_prefill_infight_queue
)
>
0
:
self
.
process_disagg_prefill_infight_queue
()
if
len
(
self
.
disagg_prefill_inf
l
ight_queue
)
>
0
:
self
.
process_disagg_prefill_inf
l
ight_queue
()
if
batch
is
None
and
len
(
self
.
disagg_prefill_infight_queue
)
==
0
:
if
batch
is
None
and
len
(
self
.
disagg_prefill_inf
l
ight_queue
)
==
0
:
self
.
check_memory
()
self
.
new_token_ratio
=
self
.
init_new_token_ratio
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment