"docs/vscode:/vscode.git/clone" did not exist on "6f36dd6366067b3a896c43e49e4719422b818e02"
Unverified Commit f7b1d8c5 authored by Qiaolin Yu's avatar Qiaolin Yu Committed by GitHub
Browse files

Fix acc len and gen throughput metrics when enabling overlap-spec (#11823)


Co-authored-by: default avatarLiangsheng Yin <lsyincs@gmail.com>
parent bfc3b3f7
...@@ -208,7 +208,7 @@ class SchedulerOutputProcessorMixin: ...@@ -208,7 +208,7 @@ class SchedulerOutputProcessorMixin:
next_token_ids = result.next_token_ids.tolist() next_token_ids = result.next_token_ids.tolist()
accept_lens = result.accept_lens.tolist() accept_lens = result.accept_lens.tolist()
result.num_accepted_tokens = sum(accept_lens) result.num_accepted_tokens = sum(accept_lens) - len(batch.reqs)
predict_tokens = [] predict_tokens = []
stride = self.draft_worker.speculative_num_draft_tokens stride = self.draft_worker.speculative_num_draft_tokens
...@@ -244,7 +244,7 @@ class SchedulerOutputProcessorMixin: ...@@ -244,7 +244,7 @@ class SchedulerOutputProcessorMixin:
accept_lens_list = result.accept_lens.tolist() accept_lens_list = result.accept_lens.tolist()
self.num_generated_tokens += len(batch.reqs) self.num_generated_tokens += len(batch.reqs)
if not self.spec_algorithm.is_none(): if not batch.spec_algorithm.is_none():
self.update_spec_metrics(batch.batch_size(), result.num_accepted_tokens) self.update_spec_metrics(batch.batch_size(), result.num_accepted_tokens)
self.token_to_kv_pool_allocator.free_group_begin() self.token_to_kv_pool_allocator.free_group_begin()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment