Unverified Commit 0cae873f authored by pansicheng's avatar pansicheng Committed by GitHub
Browse files

check_offload_progress more frequently (#11656)

parent a8b91f6b
...@@ -1008,6 +1008,9 @@ class SchedulerDisaggregationDecodeMixin: ...@@ -1008,6 +1008,9 @@ class SchedulerDisaggregationDecodeMixin:
return new_batch return new_batch
def process_decode_queue(self: Scheduler): def process_decode_queue(self: Scheduler):
if self.server_args.disaggregation_decode_enable_offload_kvcache:
self.decode_offload_manager.check_offload_progress()
# try to resume retracted requests if there are enough space for another `num_reserved_decode_tokens` decode steps # try to resume retracted requests if there are enough space for another `num_reserved_decode_tokens` decode steps
resumed_reqs = self.disagg_decode_prealloc_queue.resume_retracted_reqs() resumed_reqs = self.disagg_decode_prealloc_queue.resume_retracted_reqs()
self.waiting_queue.extend(resumed_reqs) self.waiting_queue.extend(resumed_reqs)
...@@ -1030,6 +1033,3 @@ class SchedulerDisaggregationDecodeMixin: ...@@ -1030,6 +1033,3 @@ class SchedulerDisaggregationDecodeMixin:
self.disagg_decode_transfer_queue.pop_transferred() self.disagg_decode_transfer_queue.pop_transferred()
) # the requests which kv has arrived ) # the requests which kv has arrived
self.waiting_queue.extend(alloc_reqs) self.waiting_queue.extend(alloc_reqs)
if self.server_args.disaggregation_decode_enable_offload_kvcache:
self.decode_offload_manager.check_offload_progress()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment