Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f39ab2d4
Unverified
Commit
f39ab2d4
authored
Jun 23, 2025
by
jinqinn
Committed by
GitHub
Jun 22, 2025
Browse files
[Misc] Configurable timeout for execute_model RPC calls via env var (#19544)
Signed-off-by:
jinqinn
<
goodqinjin@163.com
>
parent
4a0f7888
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
11 deletions
+12
-11
vllm/envs.py
vllm/envs.py
+6
-0
vllm/v1/executor/multiproc_executor.py
vllm/v1/executor/multiproc_executor.py
+6
-11
No files found.
vllm/envs.py
View file @
f39ab2d4
...
...
@@ -130,6 +130,7 @@ if TYPE_CHECKING:
VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS
:
int
=
1
VLLM_SLEEP_WHEN_IDLE
:
bool
=
False
VLLM_MQ_MAX_CHUNK_BYTES_MB
:
int
=
16
VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS
:
int
=
300
VLLM_KV_CACHE_LAYOUT
:
Optional
[
str
]
=
None
VLLM_COMPUTE_NANS_IN_LOGITS
:
bool
=
False
...
...
@@ -897,6 +898,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_MQ_MAX_CHUNK_BYTES_MB"
:
lambda
:
int
(
os
.
getenv
(
"VLLM_MQ_MAX_CHUNK_BYTES_MB"
,
"16"
)),
# Timeout in seconds for execute_model RPC calls in multiprocessing
# executor (only applies when TP > 1).
"VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS"
:
lambda
:
int
(
os
.
getenv
(
"VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS"
,
"300"
)),
# KV Cache layout used throughout vllm.
# Some common values are:
# - NHD
...
...
vllm/v1/executor/multiproc_executor.py
View file @
f39ab2d4
...
...
@@ -37,11 +37,6 @@ from vllm.worker.worker_base import WorkerWrapperBase
logger
=
init_logger
(
__name__
)
POLLING_TIMEOUT_MS
=
5000
POLLING_TIMEOUT_S
=
POLLING_TIMEOUT_MS
//
1000
EXECUTE_MODEL_TIMEOUT_S
=
300
class
MultiprocExecutor
(
Executor
):
...
...
@@ -160,12 +155,12 @@ class MultiprocExecutor(Executor):
self
,
scheduler_output
,
)
->
Union
[
ModelRunnerOutput
,
Future
[
ModelRunnerOutput
]]:
(
output
,
)
=
self
.
collective_rpc
(
"execute_model"
,
args
=
(
scheduler_output
,
)
,
unique_reply_rank
=
self
.
output
_rank
,
non_block
=
self
.
max_concurrent_batches
>
1
,
timeout
=
EXECUTE_MODEL_TIMEOUT_S
)
(
output
,
)
=
self
.
collective_rpc
(
"execute_model"
,
args
=
(
scheduler_
output
,
)
,
unique_reply_rank
=
self
.
output_rank
,
non_block
=
self
.
max_concurrent_batches
>
1
,
timeout
=
envs
.
VLLM_
EXECUTE_MODEL_TIMEOUT_S
ECONDS
)
return
output
def
collective_rpc
(
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment