Unverified Commit 9dbf7a2d authored by Russell Bryant's avatar Russell Bryant Committed by GitHub
Browse files

[V1] Remove log noise when idle (#16735)


Signed-off-by: default avatarRussell Bryant <rbryant@redhat.com>
parent 607029e5
...@@ -40,6 +40,8 @@ class LoggingStatLogger(StatLoggerBase): ...@@ -40,6 +40,8 @@ class LoggingStatLogger(StatLoggerBase):
# TODO: Make the interval configurable. # TODO: Make the interval configurable.
self.prefix_caching_metrics = PrefixCachingMetrics() self.prefix_caching_metrics = PrefixCachingMetrics()
self.spec_decoding_metrics = SpecDecodingMetrics() self.spec_decoding_metrics = SpecDecodingMetrics()
self.last_prompt_throughput: float = 0.0
self.last_generation_throughput: float = 0.0
def _reset(self, now): def _reset(self, now):
self.last_log_time = now self.last_log_time = now
...@@ -83,8 +85,17 @@ class LoggingStatLogger(StatLoggerBase): ...@@ -83,8 +85,17 @@ class LoggingStatLogger(StatLoggerBase):
scheduler_stats = self.last_scheduler_stats scheduler_stats = self.last_scheduler_stats
log_fn = logger.info
if not any(
(prompt_throughput, generation_throughput,
self.last_prompt_throughput, self.last_generation_throughput)):
# Avoid log noise on an idle production system
log_fn = logger.debug
self.last_generation_throughput = generation_throughput
self.last_prompt_throughput = prompt_throughput
# Format and print output. # Format and print output.
logger.info( log_fn(
"Engine %03d: " "Engine %03d: "
"Avg prompt throughput: %.1f tokens/s, " "Avg prompt throughput: %.1f tokens/s, "
"Avg generation throughput: %.1f tokens/s, " "Avg generation throughput: %.1f tokens/s, "
...@@ -101,7 +112,7 @@ class LoggingStatLogger(StatLoggerBase): ...@@ -101,7 +112,7 @@ class LoggingStatLogger(StatLoggerBase):
) )
if scheduler_stats.spec_decoding_stats is not None: if scheduler_stats.spec_decoding_stats is not None:
self.spec_decoding_metrics.log() self.spec_decoding_metrics.log(log_fn=log_fn)
class PrometheusStatLogger(StatLoggerBase): class PrometheusStatLogger(StatLoggerBase):
......
...@@ -43,14 +43,14 @@ class SpecDecodingMetrics: ...@@ -43,14 +43,14 @@ class SpecDecodingMetrics:
self.num_accepted_tokens.append( self.num_accepted_tokens.append(
spec_decoding_stats.num_accepted_tokens) spec_decoding_stats.num_accepted_tokens)
def log(self): def log(self, log_fn=logger.info):
num_draft_tokens = np.sum(self.num_draft_tokens) num_draft_tokens = np.sum(self.num_draft_tokens)
num_accepted_tokens = np.sum(self.num_accepted_tokens) num_accepted_tokens = np.sum(self.num_accepted_tokens)
draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens * draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
100 if num_draft_tokens > 0 else float("nan")) 100 if num_draft_tokens > 0 else float("nan"))
logger.info( log_fn(
"SpecDecoding metrics: " "SpecDecoding metrics: "
"Draft acceptance rate: %.1f%%, " "Draft acceptance rate: %.1f%%, "
"Accepted: %d tokens, " "Accepted: %d tokens, "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment