Unverified Commit 1c0aabde authored by Sage's avatar Sage Committed by GitHub
Browse files

[Bugfix] Suppress spurious CPU KV cache warning in `launch render` (#37911)


Signed-off-by: default avatarSage Ahrac <sagiahrak@gmail.com>
parent 14acf429
...@@ -5,6 +5,8 @@ import argparse ...@@ -5,6 +5,8 @@ import argparse
import uvloop import uvloop
from vllm import envs
from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.cli.types import CLISubcommand from vllm.entrypoints.cli.types import CLISubcommand
from vllm.entrypoints.openai.api_server import ( from vllm.entrypoints.openai.api_server import (
...@@ -108,8 +110,6 @@ def cmd_init() -> list[CLISubcommand]: ...@@ -108,8 +110,6 @@ def cmd_init() -> list[CLISubcommand]:
async def run_launch_fastapi(args: argparse.Namespace) -> None: async def run_launch_fastapi(args: argparse.Namespace) -> None:
"""Run the online serving layer with FastAPI (no GPU inference).""" """Run the online serving layer with FastAPI (no GPU inference)."""
from vllm.config import VllmConfig
# 1. Socket binding # 1. Socket binding
listen_address, sock = setup_server(args) listen_address, sock = setup_server(args)
...@@ -121,6 +121,10 @@ async def run_launch_fastapi(args: argparse.Namespace) -> None: ...@@ -121,6 +121,10 @@ async def run_launch_fastapi(args: argparse.Namespace) -> None:
# Clear quantization so VllmConfig skips quant dtype/capability validation. # Clear quantization so VllmConfig skips quant dtype/capability validation.
model_config.quantization = None model_config.quantization = None
# Render servers never allocate KV cache; suppress the spurious CPU KV
# cache space warning from CpuPlatform.check_and_update_config.
envs.VLLM_CPU_KVCACHE_SPACE = 0
vllm_config = VllmConfig(model_config=model_config) vllm_config = VllmConfig(model_config=model_config)
shutdown_task = await build_and_serve_renderer( shutdown_task = await build_and_serve_renderer(
vllm_config, listen_address, sock, args vllm_config, listen_address, sock, args
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment