Unverified Commit 43f80884 authored by Qiaolin Yu's avatar Qiaolin Yu Committed by GitHub
Browse files

Fix accept rate in speculative decoding metrics (#11572)

parent 60b05032
......@@ -259,7 +259,7 @@ class SchedulerMetricsMixin:
)
# Calculate acceptance rate: accepted tokens / total draft tokens
total_draft_tokens = self.spec_num_total_forward_ct * (
self.server_args.speculative_num_steps or 1
(self.server_args.speculative_num_steps or 0) + 1
)
spec_accept_rate = (
self.spec_num_total_accepted_tokens / total_draft_tokens
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment