Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
31f58be9
Unverified
Commit
31f58be9
authored
Jun 10, 2025
by
liusiqian-tal
Committed by
GitHub
Jun 09, 2025
Browse files
[Frontend] Make TIMEOUT_KEEP_ALIVE configurable through env var (#18472)
Signed-off-by:
liusiqian
<
liusiqian@tal.com
>
parent
ebb2f383
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
11 deletions
+14
-11
tests/async_engine/api_server_async_engine.py
tests/async_engine/api_server_async_engine.py
+6
-6
vllm/entrypoints/api_server.py
vllm/entrypoints/api_server.py
+2
-2
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+1
-3
vllm/envs.py
vllm/envs.py
+5
-0
No files found.
tests/async_engine/api_server_async_engine.py
View file @
31f58be9
...
@@ -8,6 +8,7 @@ import uvicorn
...
@@ -8,6 +8,7 @@ import uvicorn
from
fastapi.responses
import
JSONResponse
,
Response
from
fastapi.responses
import
JSONResponse
,
Response
import
vllm.entrypoints.api_server
import
vllm.entrypoints.api_server
import
vllm.envs
as
envs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.utils
import
FlexibleArgumentParser
from
vllm.utils
import
FlexibleArgumentParser
...
@@ -46,9 +47,8 @@ if __name__ == "__main__":
...
@@ -46,9 +47,8 @@ if __name__ == "__main__":
engine_args
=
AsyncEngineArgs
.
from_cli_args
(
args
)
engine_args
=
AsyncEngineArgs
.
from_cli_args
(
args
)
engine
=
AsyncLLMEngineWithStats
.
from_engine_args
(
engine_args
)
engine
=
AsyncLLMEngineWithStats
.
from_engine_args
(
engine_args
)
vllm
.
entrypoints
.
api_server
.
engine
=
engine
vllm
.
entrypoints
.
api_server
.
engine
=
engine
uvicorn
.
run
(
uvicorn
.
run
(
app
,
app
,
host
=
args
.
host
,
host
=
args
.
host
,
port
=
args
.
port
,
port
=
args
.
port
,
log_level
=
"debug"
,
log_level
=
"debug"
,
timeout_keep_alive
=
envs
.
VLLM_HTTP_TIMEOUT_KEEP_ALIVE
)
timeout_keep_alive
=
vllm
.
entrypoints
.
api_server
.
TIMEOUT_KEEP_ALIVE
)
vllm/entrypoints/api_server.py
View file @
31f58be9
...
@@ -17,6 +17,7 @@ from typing import Any, Optional
...
@@ -17,6 +17,7 @@ from typing import Any, Optional
from
fastapi
import
FastAPI
,
Request
from
fastapi
import
FastAPI
,
Request
from
fastapi.responses
import
JSONResponse
,
Response
,
StreamingResponse
from
fastapi.responses
import
JSONResponse
,
Response
,
StreamingResponse
import
vllm.envs
as
envs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.entrypoints.launcher
import
serve_http
from
vllm.entrypoints.launcher
import
serve_http
...
@@ -29,7 +30,6 @@ from vllm.version import __version__ as VLLM_VERSION
...
@@ -29,7 +30,6 @@ from vllm.version import __version__ as VLLM_VERSION
logger
=
init_logger
(
"vllm.entrypoints.api_server"
)
logger
=
init_logger
(
"vllm.entrypoints.api_server"
)
TIMEOUT_KEEP_ALIVE
=
5
# seconds.
app
=
FastAPI
()
app
=
FastAPI
()
engine
=
None
engine
=
None
...
@@ -134,7 +134,7 @@ async def run_server(args: Namespace,
...
@@ -134,7 +134,7 @@ async def run_server(args: Namespace,
host
=
args
.
host
,
host
=
args
.
host
,
port
=
args
.
port
,
port
=
args
.
port
,
log_level
=
args
.
log_level
,
log_level
=
args
.
log_level
,
timeout_keep_alive
=
TIMEOUT_KEEP_ALIVE
,
timeout_keep_alive
=
envs
.
VLLM_HTTP_
TIMEOUT_KEEP_ALIVE
,
ssl_keyfile
=
args
.
ssl_keyfile
,
ssl_keyfile
=
args
.
ssl_keyfile
,
ssl_certfile
=
args
.
ssl_certfile
,
ssl_certfile
=
args
.
ssl_certfile
,
ssl_ca_certs
=
args
.
ssl_ca_certs
,
ssl_ca_certs
=
args
.
ssl_ca_certs
,
...
...
vllm/entrypoints/openai/api_server.py
View file @
31f58be9
...
@@ -103,8 +103,6 @@ from vllm.utils import (Device, FlexibleArgumentParser, get_open_zmq_ipc_path,
...
@@ -103,8 +103,6 @@ from vllm.utils import (Device, FlexibleArgumentParser, get_open_zmq_ipc_path,
from
vllm.v1.metrics.prometheus
import
get_prometheus_registry
from
vllm.v1.metrics.prometheus
import
get_prometheus_registry
from
vllm.version
import
__version__
as
VLLM_VERSION
from
vllm.version
import
__version__
as
VLLM_VERSION
TIMEOUT_KEEP_ALIVE
=
5
# seconds
prometheus_multiproc_dir
:
tempfile
.
TemporaryDirectory
prometheus_multiproc_dir
:
tempfile
.
TemporaryDirectory
# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765)
# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765)
...
@@ -1360,7 +1358,7 @@ async def run_server_worker(listen_address,
...
@@ -1360,7 +1358,7 @@ async def run_server_worker(listen_address,
# NOTE: When the 'disable_uvicorn_access_log' value is True,
# NOTE: When the 'disable_uvicorn_access_log' value is True,
# no access log will be output.
# no access log will be output.
access_log
=
not
args
.
disable_uvicorn_access_log
,
access_log
=
not
args
.
disable_uvicorn_access_log
,
timeout_keep_alive
=
TIMEOUT_KEEP_ALIVE
,
timeout_keep_alive
=
envs
.
VLLM_HTTP_
TIMEOUT_KEEP_ALIVE
,
ssl_keyfile
=
args
.
ssl_keyfile
,
ssl_keyfile
=
args
.
ssl_keyfile
,
ssl_certfile
=
args
.
ssl_certfile
,
ssl_certfile
=
args
.
ssl_certfile
,
ssl_ca_certs
=
args
.
ssl_ca_certs
,
ssl_ca_certs
=
args
.
ssl_ca_certs
,
...
...
vllm/envs.py
View file @
31f58be9
...
@@ -71,6 +71,7 @@ if TYPE_CHECKING:
...
@@ -71,6 +71,7 @@ if TYPE_CHECKING:
VERBOSE
:
bool
=
False
VERBOSE
:
bool
=
False
VLLM_ALLOW_LONG_MAX_MODEL_LEN
:
bool
=
False
VLLM_ALLOW_LONG_MAX_MODEL_LEN
:
bool
=
False
VLLM_RPC_TIMEOUT
:
int
=
10000
# ms
VLLM_RPC_TIMEOUT
:
int
=
10000
# ms
VLLM_HTTP_TIMEOUT_KEEP_ALIVE
:
int
=
5
# seconds
VLLM_PLUGINS
:
Optional
[
list
[
str
]]
=
None
VLLM_PLUGINS
:
Optional
[
list
[
str
]]
=
None
VLLM_LORA_RESOLVER_CACHE_DIR
:
Optional
[
str
]
=
None
VLLM_LORA_RESOLVER_CACHE_DIR
:
Optional
[
str
]
=
None
VLLM_TORCH_PROFILER_DIR
:
Optional
[
str
]
=
None
VLLM_TORCH_PROFILER_DIR
:
Optional
[
str
]
=
None
...
@@ -557,6 +558,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -557,6 +558,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_RPC_TIMEOUT"
:
"VLLM_RPC_TIMEOUT"
:
lambda
:
int
(
os
.
getenv
(
"VLLM_RPC_TIMEOUT"
,
"10000"
)),
lambda
:
int
(
os
.
getenv
(
"VLLM_RPC_TIMEOUT"
,
"10000"
)),
# Timeout in seconds for keeping HTTP connections alive in API server
"VLLM_HTTP_TIMEOUT_KEEP_ALIVE"
:
lambda
:
int
(
os
.
environ
.
get
(
"VLLM_HTTP_TIMEOUT_KEEP_ALIVE"
,
"5"
)),
# a list of plugin names to load, separated by commas.
# a list of plugin names to load, separated by commas.
# if this is not set, it means all plugins will be loaded
# if this is not set, it means all plugins will be loaded
# if this is set to an empty string, no plugins will be loaded
# if this is set to an empty string, no plugins will be loaded
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment