[V1] Remove log noise when idle (#16735)

Signed-off-by: Russell Bryant <rbryant@redhat.com>

[V1] Remove log noise when idle (#16735)
Signed-off-by: Russell Bryant <rbryant@redhat.com>
9dbf7a2d · Russell Bryant · GitHub · 607029e5 · 9dbf7a2d · 9dbf7a2d
Unverified Commit 9dbf7a2d authored Apr 17, 2025 by Russell Bryant Committed by GitHub Apr 16, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 4 deletions

vllm/v1/metrics/loggers.py vllm/v1/metrics/loggers.py +13 -2

vllm/v1/spec_decode/metrics.py vllm/v1/spec_decode/metrics.py +2 -2

No files found.
--- a/vllm/v1/metrics/loggers.py
+++ b/vllm/v1/metrics/loggers.py
@@ -40,6 +40,8 @@ class LoggingStatLogger(StatLoggerBase):
        # TODO: Make the interval configurable.
        self.prefix_caching_metrics = PrefixCachingMetrics()
        self.spec_decoding_metrics = SpecDecodingMetrics()
+        self.last_prompt_throughput: float = 0.0
+        self.last_generation_throughput: float = 0.0
    def _reset(self, now):
        self.last_log_time = now
@@ -83,8 +85,17 @@ class LoggingStatLogger(StatLoggerBase):
        scheduler_stats = self.last_scheduler_stats
+        log_fn = logger.info
+        if not any(
+            (prompt_throughput, generation_throughput,
+             self.last_prompt_throughput, self.last_generation_throughput)):
+            # Avoid log noise on an idle production system
+            log_fn = logger.debug
+        self.last_generation_throughput = generation_throughput
+        self.last_prompt_throughput = prompt_throughput
        # Format and print output.
-        logger.info(
+        log_fn(
            "Engine %03d: "
            "Avg prompt throughput: %.1f tokens/s, "
            "Avg generation throughput: %.1f tokens/s, "
@@ -101,7 +112,7 @@ class LoggingStatLogger(StatLoggerBase):
        )
        if scheduler_stats.spec_decoding_stats is not None:
-            self.spec_decoding_metrics.log()
+            self.spec_decoding_metrics.log(log_fn=log_fn)
 class PrometheusStatLogger(StatLoggerBase):

--- a/vllm/v1/spec_decode/metrics.py
+++ b/vllm/v1/spec_decode/metrics.py
@@ -43,14 +43,14 @@ class SpecDecodingMetrics:
        self.num_accepted_tokens.append(
            spec_decoding_stats.num_accepted_tokens)
-    def log(self):
+    def log(self, log_fn=logger.info):
        num_draft_tokens = np.sum(self.num_draft_tokens)
        num_accepted_tokens = np.sum(self.num_accepted_tokens)
        draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
                                 100 if num_draft_tokens > 0 else float("nan"))
-        logger.info(
+        log_fn(
            "SpecDecoding metrics: "
            "Draft acceptance rate: %.1f%%, "
            "Accepted: %d tokens, "