Unverified Commit d0215a58 authored by Iskren Ivov Chernev's avatar Iskren Ivov Chernev Committed by GitHub
Browse files

Ensure metrics are logged regardless of requests (#2347)

parent 937e7b7d
......@@ -506,3 +506,9 @@ class AsyncLLMEngine:
max_log_len=engine_args.max_log_len,
start_engine_loop=start_engine_loop)
return engine
async def do_log_stats(self) -> None:
if self.engine_use_ray:
await self.engine.do_log_stats.remote()
else:
self.engine.do_log_stats()
......@@ -641,6 +641,9 @@ class LLMEngine:
return self._process_model_outputs(output, scheduler_outputs)
def do_log_stats(self) -> None:
self._log_system_stats(False, 0)
def _log_system_stats(
self,
prompt_run: bool,
......
......@@ -6,6 +6,7 @@ import asyncio
import codecs
import json
import time
from contextlib import asynccontextmanager
from http import HTTPStatus
from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
......@@ -38,11 +39,28 @@ TIMEOUT_KEEP_ALIVE = 5 # seconds
logger = init_logger(__name__)
served_model = None
app = fastapi.FastAPI()
engine_args = None
engine = None
response_role = None
@asynccontextmanager
async def lifespan(app: fastapi.FastAPI):
async def _force_log():
while True:
await asyncio.sleep(10)
await engine.do_log_stats()
if not engine_args.disable_log_stats:
asyncio.create_task(_force_log())
yield
app = fastapi.FastAPI(lifespan=lifespan)
def parse_args():
parser = argparse.ArgumentParser(
description="vLLM OpenAI-Compatible RESTful API server.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment