Unverified Commit 1bebd315 authored by Ziming Huang's avatar Ziming Huang Committed by GitHub
Browse files

Fix num_tokens_pre_allocated in disaggregation log (#7714)

parent d3c275b1
...@@ -416,6 +416,12 @@ class DecodePreallocQueue: ...@@ -416,6 +416,12 @@ class DecodePreallocQueue:
return preallocated_reqs return preallocated_reqs
@property
def num_tokens_pre_allocated(self):
return sum(
len(decode_req.req.fill_ids) for decode_req in self.transfer_queue.queue
)
def _allocatable_tokens( def _allocatable_tokens(
self, retractable_tokens: Optional[int] = None, count_retracted: bool = True self, retractable_tokens: Optional[int] = None, count_retracted: bool = True
) -> int: ) -> int:
......
...@@ -707,9 +707,6 @@ class Scheduler( ...@@ -707,9 +707,6 @@ class Scheduler(
transfer_backend=self.transfer_backend, transfer_backend=self.transfer_backend,
) )
# Metric for pre-allocation
self.num_tokens_pre_allocated = 0
elif self.disaggregation_mode == DisaggregationMode.PREFILL: elif self.disaggregation_mode == DisaggregationMode.PREFILL:
# *2 for the headroom. # *2 for the headroom.
buffer_size = self.max_running_requests * 2 buffer_size = self.max_running_requests * 2
...@@ -1372,7 +1369,7 @@ class Scheduler( ...@@ -1372,7 +1369,7 @@ class Scheduler(
msg += f"accept len: {spec_accept_length:.2f}, " msg += f"accept len: {spec_accept_length:.2f}, "
if self.disaggregation_mode == DisaggregationMode.DECODE: if self.disaggregation_mode == DisaggregationMode.DECODE:
msg += f"pre-allocated usage: {self.num_tokens_pre_allocated / self.max_total_num_tokens:.2f}, " msg += f"pre-allocated usage: {self.disagg_decode_prealloc_queue.num_tokens_pre_allocated / self.max_total_num_tokens:.2f}, "
msg += f"#retracted-req: {len(self.disagg_decode_prealloc_queue.retracted_queue)}, " msg += f"#retracted-req: {len(self.disagg_decode_prealloc_queue.retracted_queue)}, "
msg += ( msg += (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment