"vscode:/vscode.git/clone" did not exist on "08bf7840780980c7568c573c70a6a8db94fd45ff"
Unverified Commit 31f58be9 authored by liusiqian-tal's avatar liusiqian-tal Committed by GitHub
Browse files

[Frontend] Make TIMEOUT_KEEP_ALIVE configurable through env var (#18472)


Signed-off-by: default avatarliusiqian <liusiqian@tal.com>
parent ebb2f383
...@@ -8,6 +8,7 @@ import uvicorn ...@@ -8,6 +8,7 @@ import uvicorn
from fastapi.responses import JSONResponse, Response from fastapi.responses import JSONResponse, Response
import vllm.entrypoints.api_server import vllm.entrypoints.api_server
import vllm.envs as envs
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.utils import FlexibleArgumentParser from vllm.utils import FlexibleArgumentParser
...@@ -46,9 +47,8 @@ if __name__ == "__main__": ...@@ -46,9 +47,8 @@ if __name__ == "__main__":
engine_args = AsyncEngineArgs.from_cli_args(args) engine_args = AsyncEngineArgs.from_cli_args(args)
engine = AsyncLLMEngineWithStats.from_engine_args(engine_args) engine = AsyncLLMEngineWithStats.from_engine_args(engine_args)
vllm.entrypoints.api_server.engine = engine vllm.entrypoints.api_server.engine = engine
uvicorn.run( uvicorn.run(app,
app,
host=args.host, host=args.host,
port=args.port, port=args.port,
log_level="debug", log_level="debug",
timeout_keep_alive=vllm.entrypoints.api_server.TIMEOUT_KEEP_ALIVE) timeout_keep_alive=envs.VLLM_HTTP_TIMEOUT_KEEP_ALIVE)
...@@ -17,6 +17,7 @@ from typing import Any, Optional ...@@ -17,6 +17,7 @@ from typing import Any, Optional
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, Response, StreamingResponse from fastapi.responses import JSONResponse, Response, StreamingResponse
import vllm.envs as envs
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.launcher import serve_http from vllm.entrypoints.launcher import serve_http
...@@ -29,7 +30,6 @@ from vllm.version import __version__ as VLLM_VERSION ...@@ -29,7 +30,6 @@ from vllm.version import __version__ as VLLM_VERSION
logger = init_logger("vllm.entrypoints.api_server") logger = init_logger("vllm.entrypoints.api_server")
TIMEOUT_KEEP_ALIVE = 5 # seconds.
app = FastAPI() app = FastAPI()
engine = None engine = None
...@@ -134,7 +134,7 @@ async def run_server(args: Namespace, ...@@ -134,7 +134,7 @@ async def run_server(args: Namespace,
host=args.host, host=args.host,
port=args.port, port=args.port,
log_level=args.log_level, log_level=args.log_level,
timeout_keep_alive=TIMEOUT_KEEP_ALIVE, timeout_keep_alive=envs.VLLM_HTTP_TIMEOUT_KEEP_ALIVE,
ssl_keyfile=args.ssl_keyfile, ssl_keyfile=args.ssl_keyfile,
ssl_certfile=args.ssl_certfile, ssl_certfile=args.ssl_certfile,
ssl_ca_certs=args.ssl_ca_certs, ssl_ca_certs=args.ssl_ca_certs,
......
...@@ -103,8 +103,6 @@ from vllm.utils import (Device, FlexibleArgumentParser, get_open_zmq_ipc_path, ...@@ -103,8 +103,6 @@ from vllm.utils import (Device, FlexibleArgumentParser, get_open_zmq_ipc_path,
from vllm.v1.metrics.prometheus import get_prometheus_registry from vllm.v1.metrics.prometheus import get_prometheus_registry
from vllm.version import __version__ as VLLM_VERSION from vllm.version import __version__ as VLLM_VERSION
TIMEOUT_KEEP_ALIVE = 5 # seconds
prometheus_multiproc_dir: tempfile.TemporaryDirectory prometheus_multiproc_dir: tempfile.TemporaryDirectory
# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765) # Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765)
...@@ -1360,7 +1358,7 @@ async def run_server_worker(listen_address, ...@@ -1360,7 +1358,7 @@ async def run_server_worker(listen_address,
# NOTE: When the 'disable_uvicorn_access_log' value is True, # NOTE: When the 'disable_uvicorn_access_log' value is True,
# no access log will be output. # no access log will be output.
access_log=not args.disable_uvicorn_access_log, access_log=not args.disable_uvicorn_access_log,
timeout_keep_alive=TIMEOUT_KEEP_ALIVE, timeout_keep_alive=envs.VLLM_HTTP_TIMEOUT_KEEP_ALIVE,
ssl_keyfile=args.ssl_keyfile, ssl_keyfile=args.ssl_keyfile,
ssl_certfile=args.ssl_certfile, ssl_certfile=args.ssl_certfile,
ssl_ca_certs=args.ssl_ca_certs, ssl_ca_certs=args.ssl_ca_certs,
......
...@@ -71,6 +71,7 @@ if TYPE_CHECKING: ...@@ -71,6 +71,7 @@ if TYPE_CHECKING:
VERBOSE: bool = False VERBOSE: bool = False
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
VLLM_RPC_TIMEOUT: int = 10000 # ms VLLM_RPC_TIMEOUT: int = 10000 # ms
VLLM_HTTP_TIMEOUT_KEEP_ALIVE: int = 5 # seconds
VLLM_PLUGINS: Optional[list[str]] = None VLLM_PLUGINS: Optional[list[str]] = None
VLLM_LORA_RESOLVER_CACHE_DIR: Optional[str] = None VLLM_LORA_RESOLVER_CACHE_DIR: Optional[str] = None
VLLM_TORCH_PROFILER_DIR: Optional[str] = None VLLM_TORCH_PROFILER_DIR: Optional[str] = None
...@@ -557,6 +558,10 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -557,6 +558,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_RPC_TIMEOUT": "VLLM_RPC_TIMEOUT":
lambda: int(os.getenv("VLLM_RPC_TIMEOUT", "10000")), lambda: int(os.getenv("VLLM_RPC_TIMEOUT", "10000")),
# Timeout in seconds for keeping HTTP connections alive in API server
"VLLM_HTTP_TIMEOUT_KEEP_ALIVE":
lambda: int(os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")),
# a list of plugin names to load, separated by commas. # a list of plugin names to load, separated by commas.
# if this is not set, it means all plugins will be loaded # if this is not set, it means all plugins will be loaded
# if this is set to an empty string, no plugins will be loaded # if this is set to an empty string, no plugins will be loaded
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment