"docs/git@developer.sourcefind.cn:change/sglang.git" did not exist on "e70fa279bc2c3e44da94edc81affec60b9681e52"
Unverified Commit 817e46f4 authored by Scott Lee's avatar Scott Lee Committed by GitHub
Browse files

Refactor spec decoding metrics calculation into separate `TokenizerManager`...

Refactor spec decoding metrics calculation into separate `TokenizerManager` utility function (#11586)
parent 5a33c3aa
...@@ -1394,37 +1394,7 @@ class TokenizerManager(TokenizerCommunicatorMixin): ...@@ -1394,37 +1394,7 @@ class TokenizerManager(TokenizerCommunicatorMixin):
state.finished = recv_obj.finished_reasons[i] is not None state.finished = recv_obj.finished_reasons[i] is not None
if state.finished: if state.finished:
if self.server_args.speculative_algorithm: if self.server_args.speculative_algorithm:
meta_info["spec_verify_ct"] = recv_obj.spec_verify_ct[i] self._calculate_spec_decoding_metrics(meta_info, recv_obj, i)
if (
recv_obj.spec_verify_ct[i] > 0
and self.server_args.speculative_num_steps is not None
and not isinstance(recv_obj, BatchEmbeddingOutput)
and hasattr(recv_obj, "spec_accepted_tokens")
# Checks that `spec_accepted_tokens[i]` will exist.
and len(recv_obj.spec_accepted_tokens) > i
):
total_draft_tokens = (
recv_obj.spec_verify_ct[i]
* self.server_args.speculative_num_steps
)
accepted_tokens = recv_obj.spec_accepted_tokens[i]
# Calculate per-request acceptance rate and average acceptance length.
if total_draft_tokens > 0:
# Calculate acceptance rate: accepted / (steps * lookahead)
meta_info["spec_accept_rate"] = (
accepted_tokens / total_draft_tokens
)
meta_info["spec_accept_length"] = (
recv_obj.completion_tokens[i]
/ recv_obj.spec_verify_ct[i]
)
else:
meta_info["spec_accept_rate"] = 0.0
meta_info["spec_accept_length"] = 0
else:
meta_info["spec_accept_rate"] = 0.0
meta_info["spec_accept_length"] = 0
state.finished_time = time.time() state.finished_time = time.time()
meta_info["e2e_latency"] = state.finished_time - state.created_time meta_info["e2e_latency"] = state.finished_time - state.created_time
...@@ -1572,6 +1542,43 @@ class TokenizerManager(TokenizerCommunicatorMixin): ...@@ -1572,6 +1542,43 @@ class TokenizerManager(TokenizerCommunicatorMixin):
ret.append(None) ret.append(None)
return ret return ret
def _calculate_spec_decoding_metrics(
self,
meta_info: Dict[str, Any],
recv_obj: Union[
BatchStrOutput,
BatchEmbeddingOutput,
BatchMultimodalOutput,
BatchTokenIDOutput,
],
i: int,
) -> None:
"""Calculate speculative decoding metrics, such as acceptance rate and acceptance length metrics."""
meta_info["spec_accept_rate"] = 0.0
meta_info["spec_accept_length"] = 0
meta_info["spec_verify_ct"] = recv_obj.spec_verify_ct[i]
if (
recv_obj.spec_verify_ct[i] > 0
and self.server_args.speculative_num_steps is not None
and not isinstance(recv_obj, BatchEmbeddingOutput)
and hasattr(recv_obj, "spec_accepted_tokens")
# Checks that `spec_accepted_tokens[i]` will exist.
and len(recv_obj.spec_accepted_tokens) > i
):
total_draft_tokens = (
recv_obj.spec_verify_ct[i] * self.server_args.speculative_num_steps
)
accepted_tokens = recv_obj.spec_accepted_tokens[i]
# Calculate per-request acceptance rate and average acceptance length.
if total_draft_tokens > 0:
# Calculate acceptance rate: accepted / (steps * lookahead)
meta_info["spec_accept_rate"] = accepted_tokens / total_draft_tokens
meta_info["spec_accept_length"] = (
recv_obj.completion_tokens[i] / recv_obj.spec_verify_ct[i]
)
def collect_metrics(self, state: ReqState, recv_obj: BatchStrOutput, i: int): def collect_metrics(self, state: ReqState, recv_obj: BatchStrOutput, i: int):
completion_tokens = ( completion_tokens = (
recv_obj.completion_tokens[i] recv_obj.completion_tokens[i]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment