Unverified Commit 68be0f85 authored by Csrayz's avatar Csrayz Committed by GitHub
Browse files

[Metrics] Add request_id to FinishedRequestStats to enable correlation between...


[Metrics] Add request_id to FinishedRequestStats to enable correlation between metrics and requests (#39710)

Enables external `StatLogger` plugins to correlate per-request metrics
with request-level context. Also, this is a pre-requisite for Prometheus
exemplars in #30972.
Signed-off-by: default avatarCsrayz <33659823+Csrayz@users.noreply.github.com>
parent 60995c05
......@@ -26,6 +26,7 @@ def test_prefill_kv_computed_with_cache():
# Case 1: With prefix cache (1200 tokens cached)
iteration_stats.update_from_finished_request(
finish_reason=FinishReason.STOP,
request_id="test-req-001",
num_prompt_tokens=10000,
max_tokens_param=100,
req_stats=req_stats,
......@@ -35,6 +36,7 @@ def test_prefill_kv_computed_with_cache():
finished_req = iteration_stats.finished_requests[0]
assert finished_req.num_prompt_tokens == 10000
assert finished_req.num_cached_tokens == 1200
assert finished_req.request_id == "test-req-001"
# Verify calculation: prefill KV = prompt tokens - cached tokens
prefill_kv_computed = finished_req.num_prompt_tokens - max(
......@@ -55,6 +57,7 @@ def test_prefill_kv_computed_no_cache():
# Case 2: No prefix cache
iteration_stats.update_from_finished_request(
finish_reason=FinishReason.STOP,
request_id="test-req-002",
num_prompt_tokens=2000,
max_tokens_param=100,
req_stats=req_stats,
......@@ -64,6 +67,7 @@ def test_prefill_kv_computed_no_cache():
finished_req = iteration_stats.finished_requests[0]
assert finished_req.num_prompt_tokens == 2000
assert finished_req.num_cached_tokens == 0
assert finished_req.request_id == "test-req-002"
# Verify calculation: prefill KV = full prompt when no cache
prefill_kv_computed = finished_req.num_prompt_tokens - max(
......@@ -84,6 +88,7 @@ def test_prefill_kv_computed_edge_cases():
# Case 3: Negative num_cached_tokens (shouldn't happen, but handle gracefully)
iteration_stats.update_from_finished_request(
finish_reason=FinishReason.STOP,
request_id="test-req-003",
num_prompt_tokens=100,
max_tokens_param=10,
req_stats=req_stats,
......@@ -96,11 +101,13 @@ def test_prefill_kv_computed_edge_cases():
finished_req.num_cached_tokens, 0
)
assert prefill_kv_computed == 100 # Should treat negative as 0
assert finished_req.request_id == "test-req-003"
# Case 4: All tokens cached (shouldn't happen in practice)
iteration_stats2 = IterationStats()
iteration_stats2.update_from_finished_request(
finish_reason=FinishReason.STOP,
request_id="test-req-004",
num_prompt_tokens=100,
max_tokens_param=10,
req_stats=req_stats,
......@@ -112,6 +119,7 @@ def test_prefill_kv_computed_edge_cases():
finished_req2.num_cached_tokens, 0
)
assert prefill_kv_computed2 == 0 # All cached, nothing computed
assert finished_req2.request_id == "test-req-004"
def test_prompt_token_stats_all_computed():
......
......@@ -799,6 +799,7 @@ class OutputProcessor:
assert req_state.stats is not None
iteration_stats.update_from_finished_request(
finish_reason=finish_reason,
request_id=req_state.external_req_id,
num_prompt_tokens=req_state.prompt_len,
max_tokens_param=req_state.max_tokens_param,
req_stats=req_state.stats,
......
......@@ -225,6 +225,7 @@ class FinishedRequestStats:
"""Stats associated with a finished request."""
finish_reason: "FinishReason"
request_id: str | None = None
e2e_latency: float = 0.0
num_prompt_tokens: int = 0
num_generation_tokens: int = 0
......@@ -427,6 +428,7 @@ class IterationStats:
def update_from_finished_request(
self,
finish_reason: "FinishReason",
request_id: str,
num_prompt_tokens: int,
max_tokens_param: int | None,
req_stats: RequestStateStats,
......@@ -458,6 +460,7 @@ class IterationStats:
finished_req = FinishedRequestStats(
finish_reason=finish_reason,
request_id=request_id,
e2e_latency=e2e_latency,
num_prompt_tokens=num_prompt_tokens,
num_generation_tokens=req_stats.num_generation_tokens,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment