Unverified Commit 7fe25588 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Misc] Log vLLM logo when starting server (#32796)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent dc917cce
...@@ -30,6 +30,7 @@ from grpc_reflection.v1alpha import reflection ...@@ -30,6 +30,7 @@ from grpc_reflection.v1alpha import reflection
from vllm import SamplingParams, TextPrompt, TokensPrompt from vllm import SamplingParams, TextPrompt, TokensPrompt
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.utils import log_version_and_model
from vllm.grpc import vllm_engine_pb2, vllm_engine_pb2_grpc from vllm.grpc import vllm_engine_pb2, vllm_engine_pb2_grpc
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
...@@ -408,8 +409,8 @@ async def serve_grpc(args: argparse.Namespace): ...@@ -408,8 +409,8 @@ async def serve_grpc(args: argparse.Namespace):
Args: Args:
args: Parsed command line arguments args: Parsed command line arguments
""" """
logger.info("vLLM gRPC server version %s", VLLM_VERSION) log_version_and_model(logger, VLLM_VERSION, args.model)
logger.info("args: %s", args) logger.info("vLLM gRPC server args: %s", args)
start_time = time.time() start_time = time.time()
......
...@@ -63,6 +63,7 @@ from vllm.entrypoints.serve.tokenize.serving import OpenAIServingTokenization ...@@ -63,6 +63,7 @@ from vllm.entrypoints.serve.tokenize.serving import OpenAIServingTokenization
from vllm.entrypoints.utils import ( from vllm.entrypoints.utils import (
cli_env_setup, cli_env_setup,
log_non_default_args, log_non_default_args,
log_version_and_model,
process_lora_modules, process_lora_modules,
sanitize_message, sanitize_message,
) )
...@@ -867,7 +868,7 @@ def setup_server(args): ...@@ -867,7 +868,7 @@ def setup_server(args):
"""Validate API server args, set up signal handler, create socket """Validate API server args, set up signal handler, create socket
ready to serve.""" ready to serve."""
logger.info("vLLM API server version %s", VLLM_VERSION) log_version_and_model(logger, VLLM_VERSION, args.model)
log_non_default_args(args) log_non_default_args(args)
if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3: if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
......
...@@ -6,6 +6,8 @@ import dataclasses ...@@ -6,6 +6,8 @@ import dataclasses
import functools import functools
import os import os
from argparse import Namespace from argparse import Namespace
from logging import Logger
from string import Template
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any
import regex as re import regex as re
...@@ -13,8 +15,9 @@ from fastapi import Request ...@@ -13,8 +15,9 @@ from fastapi import Request
from fastapi.responses import JSONResponse, StreamingResponse from fastapi.responses import JSONResponse, StreamingResponse
from starlette.background import BackgroundTask, BackgroundTasks from starlette.background import BackgroundTask, BackgroundTasks
from vllm import envs
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import EngineArgs
from vllm.logger import init_logger from vllm.logger import current_formatter_type, init_logger
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
...@@ -295,3 +298,28 @@ def process_lora_modules( ...@@ -295,3 +298,28 @@ def process_lora_modules(
def sanitize_message(message: str) -> str: def sanitize_message(message: str) -> str:
# Avoid leaking memory address from object reprs # Avoid leaking memory address from object reprs
return re.sub(r" at 0x[0-9a-f]+>", ">", message) return re.sub(r" at 0x[0-9a-f]+>", ">", message)
def log_version_and_model(lgr: Logger, version: str, model_name: str) -> None:
if envs.VLLM_DISABLE_LOG_LOGO or (formatter := current_formatter_type(lgr)) is None:
message = "vLLM server version %s, serving model %s"
else:
logo_template = Template(
"\n ${w}█ █ █▄ ▄█${r}\n"
" ${o}▄▄${r} ${b}▄█${r} ${w}█ █ █ ▀▄▀ █${r} version ${w}%s${r}\n"
" ${o}█${r}${b}▄█▀${r} ${w}█ █ █ █${r} model ${w}%s${r}\n"
" ${b}▀▀${r} ${w}▀▀▀▀▀ ▀▀▀▀▀ ▀ ▀${r}\n"
)
colors = {
"w": "\033[97;1m", # white
"o": "\033[93m", # orange
"b": "\033[94m", # blue
"r": "\033[0m", # reset
}
if formatter != "color":
# monochrome logo (no ansi escape codes)
colors = dict.fromkeys(colors, "")
message = logo_template.substitute(colors)
lgr.info(message, version, model_name)
...@@ -249,6 +249,7 @@ if TYPE_CHECKING: ...@@ -249,6 +249,7 @@ if TYPE_CHECKING:
VLLM_USE_V2_MODEL_RUNNER: bool = False VLLM_USE_V2_MODEL_RUNNER: bool = False
VLLM_LOG_MODEL_INSPECTION: bool = False VLLM_LOG_MODEL_INSPECTION: bool = False
VLLM_DEBUG_MFU_METRICS: bool = False VLLM_DEBUG_MFU_METRICS: bool = False
VLLM_DISABLE_LOG_LOGO: bool = False
def get_default_cache_root(): def get_default_cache_root():
...@@ -1610,6 +1611,8 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1610,6 +1611,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_DEBUG_MFU_METRICS": lambda: bool( "VLLM_DEBUG_MFU_METRICS": lambda: bool(
int(os.getenv("VLLM_DEBUG_MFU_METRICS", "0")) int(os.getenv("VLLM_DEBUG_MFU_METRICS", "0"))
), ),
# Disable logging of vLLM logo at server startup time.
"VLLM_DISABLE_LOG_LOGO": lambda: bool(int(os.getenv("VLLM_DISABLE_LOG_LOGO", "0"))),
} }
# --8<-- [end:env-vars-definition] # --8<-- [end:env-vars-definition]
......
...@@ -17,6 +17,7 @@ from types import MethodType ...@@ -17,6 +17,7 @@ from types import MethodType
from typing import Any, Literal, cast from typing import Any, Literal, cast
import vllm.envs as envs import vllm.envs as envs
from vllm.logging_utils import ColoredFormatter, NewLineFormatter
_FORMAT = ( _FORMAT = (
f"{envs.VLLM_LOGGING_PREFIX}%(levelname)s %(asctime)s " f"{envs.VLLM_LOGGING_PREFIX}%(levelname)s %(asctime)s "
...@@ -224,6 +225,18 @@ def suppress_logging(level: int = logging.INFO) -> Generator[None, Any, None]: ...@@ -224,6 +225,18 @@ def suppress_logging(level: int = logging.INFO) -> Generator[None, Any, None]:
logging.disable(current_level) logging.disable(current_level)
def current_formatter_type(lgr: Logger) -> Literal["color", "newline", None]:
while lgr is not None:
if lgr.handlers and len(lgr.handlers) == 1 and lgr.handlers[0].name == "vllm":
formatter = lgr.handlers[0].formatter
if isinstance(formatter, ColoredFormatter):
return "color"
if isinstance(formatter, NewLineFormatter):
return "newline"
lgr = lgr.parent
return None
# The root logger is initialized when the module is imported. # The root logger is initialized when the module is imported.
# This is thread-safe as the module is only imported once, # This is thread-safe as the module is only imported once,
# guaranteed by the Python GIL. # guaranteed by the Python GIL.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment