Unverified Commit 20133cfe authored by Frαnçois's avatar Frαnçois Committed by GitHub
Browse files

[Frontend] enable custom logging for the uvicorn server (OpenAI API server) (#18403)


Signed-off-by: default avatarFrançois Paupier <francois.paupier@gmail.com>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
parent ebb1ec93
...@@ -5,6 +5,7 @@ import atexit ...@@ -5,6 +5,7 @@ import atexit
import gc import gc
import importlib import importlib
import inspect import inspect
import json
import multiprocessing import multiprocessing
import os import os
import signal import signal
...@@ -16,7 +17,6 @@ from collections.abc import AsyncIterator ...@@ -16,7 +17,6 @@ from collections.abc import AsyncIterator
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from functools import partial from functools import partial
from http import HTTPStatus from http import HTTPStatus
from json import JSONDecodeError
from typing import Annotated, Any, Optional from typing import Annotated, Any, Optional
import prometheus_client import prometheus_client
...@@ -930,7 +930,7 @@ async def invocations(raw_request: Request): ...@@ -930,7 +930,7 @@ async def invocations(raw_request: Request):
""" """
try: try:
body = await raw_request.json() body = await raw_request.json()
except JSONDecodeError as e: except json.JSONDecodeError as e:
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST.value, raise HTTPException(status_code=HTTPStatus.BAD_REQUEST.value,
detail=f"JSON decode error: {e}") from e detail=f"JSON decode error: {e}") from e
...@@ -1003,6 +1003,18 @@ if envs.VLLM_ALLOW_RUNTIME_LORA_UPDATING: ...@@ -1003,6 +1003,18 @@ if envs.VLLM_ALLOW_RUNTIME_LORA_UPDATING:
return Response(status_code=200, content=response) return Response(status_code=200, content=response)
def load_log_config(log_config_file: Optional[str]) -> Optional[dict]:
if not log_config_file:
return None
try:
with open(log_config_file) as f:
return json.load(f)
except Exception as e:
logger.warning("Failed to load log config from file %s: error %s",
log_config_file, e)
return None
def build_app(args: Namespace) -> FastAPI: def build_app(args: Namespace) -> FastAPI:
if args.disable_fastapi_docs: if args.disable_fastapi_docs:
app = FastAPI(openapi_url=None, app = FastAPI(openapi_url=None,
...@@ -1324,6 +1336,11 @@ async def run_server_worker(listen_address, ...@@ -1324,6 +1336,11 @@ async def run_server_worker(listen_address,
server_index = client_config.get("client_index", 0) if client_config else 0 server_index = client_config.get("client_index", 0) if client_config else 0
# Load logging config for uvicorn if specified
log_config = load_log_config(args.log_config_file)
if log_config is not None:
uvicorn_kwargs['log_config'] = log_config
async with build_async_engine_client(args, client_config) as engine_client: async with build_async_engine_client(args, client_config) as engine_client:
app = build_app(args) app = build_app(args)
......
...@@ -11,6 +11,7 @@ import ssl ...@@ -11,6 +11,7 @@ import ssl
from collections.abc import Sequence from collections.abc import Sequence
from typing import Optional, Union, get_args from typing import Optional, Union, get_args
import vllm.envs as envs
from vllm.engine.arg_utils import AsyncEngineArgs, optional_type from vllm.engine.arg_utils import AsyncEngineArgs, optional_type
from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption, from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
validate_chat_template) validate_chat_template)
...@@ -243,6 +244,13 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: ...@@ -243,6 +244,13 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
" into OpenAI API format, the name register in this plugin can be used " " into OpenAI API format, the name register in this plugin can be used "
"in ``--tool-call-parser``.") "in ``--tool-call-parser``.")
parser.add_argument(
"--log-config-file",
type=str,
default=envs.VLLM_LOGGING_CONFIG_PATH,
help="Path to logging config JSON file for both vllm and uvicorn",
)
parser = AsyncEngineArgs.add_cli_args(parser) parser = AsyncEngineArgs.add_cli_args(parser)
parser.add_argument('--max-log-len', parser.add_argument('--max-log-len',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment