"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "1dfea5f4a95df8d14b46433a479a28d56e60494c"
Unverified Commit 872db2be authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[V1] Simplify stats logging (#14082)


Signed-off-by: default avatarNick Hill <nhill@redhat.com>
parent 2dfdfed8
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import asyncio import asyncio
import logging
import os import os
from collections.abc import AsyncGenerator, Mapping from collections.abc import AsyncGenerator, Mapping
from typing import Optional, Union from typing import Optional, Union
...@@ -57,10 +58,9 @@ class AsyncLLM(EngineClient): ...@@ -57,10 +58,9 @@ class AsyncLLM(EngineClient):
self.log_stats = log_stats self.log_stats = log_stats
self.stat_loggers: list[StatLoggerBase] = [] self.stat_loggers: list[StatLoggerBase] = []
if self.log_stats: if self.log_stats:
self.stat_loggers.extend([ if logger.isEnabledFor(logging.INFO):
LoggingStatLogger(), self.stat_loggers.append(LoggingStatLogger())
PrometheusStatLogger(vllm_config), self.stat_loggers.append(PrometheusStatLogger(vllm_config))
])
# Tokenizer (+ ensure liveness if running in another process). # Tokenizer (+ ensure liveness if running in another process).
self.tokenizer = init_tokenizer_from_configs( self.tokenizer = init_tokenizer_from_configs(
...@@ -287,7 +287,7 @@ class AsyncLLM(EngineClient): ...@@ -287,7 +287,7 @@ class AsyncLLM(EngineClient):
# 4) Logging. # 4) Logging.
# TODO(rob): make into a coroutine and launch it in # TODO(rob): make into a coroutine and launch it in
# background thread once Prometheus overhead is non-trivial. # background thread once Prometheus overhead is non-trivial.
self._log_stats( self._record_stats(
scheduler_stats=outputs.scheduler_stats, scheduler_stats=outputs.scheduler_stats,
iteration_stats=iteration_stats, iteration_stats=iteration_stats,
) )
...@@ -306,7 +306,7 @@ class AsyncLLM(EngineClient): ...@@ -306,7 +306,7 @@ class AsyncLLM(EngineClient):
if self.log_requests: if self.log_requests:
logger.info("Aborted request %s.", request_id) logger.info("Aborted request %s.", request_id)
def _log_stats( def _record_stats(
self, self,
scheduler_stats: Optional[SchedulerStats], scheduler_stats: Optional[SchedulerStats],
iteration_stats: Optional[IterationStats], iteration_stats: Optional[IterationStats],
...@@ -316,8 +316,8 @@ class AsyncLLM(EngineClient): ...@@ -316,8 +316,8 @@ class AsyncLLM(EngineClient):
assert scheduler_stats is not None assert scheduler_stats is not None
assert iteration_stats is not None assert iteration_stats is not None
for logger in self.stat_loggers: for stat_logger in self.stat_loggers:
logger.log(scheduler_stats=scheduler_stats, stat_logger.record(scheduler_stats=scheduler_stats,
iteration_stats=iteration_stats) iteration_stats=iteration_stats)
def encode( def encode(
...@@ -354,7 +354,8 @@ class AsyncLLM(EngineClient): ...@@ -354,7 +354,8 @@ class AsyncLLM(EngineClient):
scheduler_outputs=None, scheduler_outputs=None,
model_output=None, model_output=None,
) -> None: ) -> None:
logger.debug("Called do_log_stats.") for stat_logger in self.stat_loggers:
stat_logger.log()
async def check_health(self) -> None: async def check_health(self) -> None:
logger.debug("Called check_health.") logger.debug("Called check_health.")
......
...@@ -316,19 +316,10 @@ class EngineCoreProc(EngineCore): ...@@ -316,19 +316,10 @@ class EngineCoreProc(EngineCore):
# Loop until process is sent a SIGINT or SIGTERM # Loop until process is sent a SIGINT or SIGTERM
while True: while True:
# 1) Poll the input queue until there is work to do. # 1) Poll the input queue until there is work to do.
if not self.scheduler.has_unfinished_requests(): while not self.scheduler.has_unfinished_requests():
while True:
try:
req = self.input_queue.get(timeout=POLLING_TIMEOUT_S)
self._handle_client_request(*req)
break
except queue.Empty:
logger.debug("EngineCore busy loop waiting.") logger.debug("EngineCore busy loop waiting.")
# Break out the loop so we can log_stats in step(). req = self.input_queue.get()
if self.log_stats: self._handle_client_request(*req)
break
except BaseException:
raise
# 2) Handle any new client requests. # 2) Handle any new client requests.
while not self.input_queue.empty(): while not self.input_queue.empty():
......
...@@ -21,15 +21,19 @@ _LOCAL_LOGGING_INTERVAL_SEC = 5.0 ...@@ -21,15 +21,19 @@ _LOCAL_LOGGING_INTERVAL_SEC = 5.0
class StatLoggerBase(ABC): class StatLoggerBase(ABC):
@abstractmethod @abstractmethod
def log(self, scheduler_stats: SchedulerStats, def record(self, scheduler_stats: SchedulerStats,
iteration_stats: IterationStats): iteration_stats: IterationStats):
... ...
def log(self): # noqa
pass
class LoggingStatLogger(StatLoggerBase): class LoggingStatLogger(StatLoggerBase):
def __init__(self): def __init__(self):
self._reset(time.monotonic()) self._reset(time.monotonic())
self.last_scheduler_stats = SchedulerStats()
def _reset(self, now): def _reset(self, now):
self.last_log_time = now self.last_log_time = now
...@@ -41,11 +45,6 @@ class LoggingStatLogger(StatLoggerBase): ...@@ -41,11 +45,6 @@ class LoggingStatLogger(StatLoggerBase):
# Prefix cache metrics. TODO: Make the interval configurable. # Prefix cache metrics. TODO: Make the interval configurable.
self.prefix_caching_metrics = PrefixCachingMetrics() self.prefix_caching_metrics = PrefixCachingMetrics()
def _local_interval_elapsed(self, now: float) -> bool:
# Log every _LOCAL_LOGGING_INTERVAL_SEC.
elapsed_time = now - self.last_log_time
return elapsed_time > _LOCAL_LOGGING_INTERVAL_SEC
def _track_iteration_stats(self, iteration_stats: IterationStats): def _track_iteration_stats(self, iteration_stats: IterationStats):
# Save tracked stats for token counters. # Save tracked stats for token counters.
self.num_prompt_tokens.append(iteration_stats.num_prompt_tokens) self.num_prompt_tokens.append(iteration_stats.num_prompt_tokens)
...@@ -56,7 +55,7 @@ class LoggingStatLogger(StatLoggerBase): ...@@ -56,7 +55,7 @@ class LoggingStatLogger(StatLoggerBase):
# Compute summary metrics for tracked stats # Compute summary metrics for tracked stats
return float(np.sum(tracked_stats) / (now - self.last_log_time)) return float(np.sum(tracked_stats) / (now - self.last_log_time))
def log(self, scheduler_stats: SchedulerStats, def record(self, scheduler_stats: SchedulerStats,
iteration_stats: IterationStats): iteration_stats: IterationStats):
"""Log Stats to standard output.""" """Log Stats to standard output."""
...@@ -64,16 +63,18 @@ class LoggingStatLogger(StatLoggerBase): ...@@ -64,16 +63,18 @@ class LoggingStatLogger(StatLoggerBase):
self.prefix_caching_metrics.observe(scheduler_stats.prefix_cache_stats) self.prefix_caching_metrics.observe(scheduler_stats.prefix_cache_stats)
now = time.monotonic() self.last_scheduler_stats = scheduler_stats
if not self._local_interval_elapsed(now):
return
def log(self):
now = time.monotonic()
prompt_throughput = self._get_throughput(self.num_prompt_tokens, now) prompt_throughput = self._get_throughput(self.num_prompt_tokens, now)
generation_throughput = self._get_throughput( generation_throughput = self._get_throughput(
self.num_generation_tokens, now) self.num_generation_tokens, now)
self._reset(now) self._reset(now)
scheduler_stats = self.last_scheduler_stats
# Format and print output. # Format and print output.
logger.info( logger.info(
"Avg prompt throughput: %.1f tokens/s, " "Avg prompt throughput: %.1f tokens/s, "
...@@ -274,7 +275,7 @@ class PrometheusStatLogger(StatLoggerBase): ...@@ -274,7 +275,7 @@ class PrometheusStatLogger(StatLoggerBase):
labelnames=metrics_info.keys()).labels(**metrics_info) labelnames=metrics_info.keys()).labels(**metrics_info)
info_gauge.set(1) info_gauge.set(1)
def log(self, scheduler_stats: SchedulerStats, def record(self, scheduler_stats: SchedulerStats,
iteration_stats: IterationStats): iteration_stats: IterationStats):
"""Log to prometheus.""" """Log to prometheus."""
self.gauge_scheduler_running.set(scheduler_stats.num_running_reqs) self.gauge_scheduler_running.set(scheduler_stats.num_running_reqs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment