Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
12575cfa
Unverified
Commit
12575cfa
authored
Jun 19, 2025
by
Chauncey
Committed by
GitHub
Jun 18, 2025
Browse files
[Bugfix] fix RAY_CGRAPH_get_timeout is not set successfully (#19725)
Signed-off-by:
chaunceyjiang
<
chaunceyjiang@gmail.com
>
parent
8b6e1d63
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
10 deletions
+10
-10
vllm/executor/ray_distributed_executor.py
vllm/executor/ray_distributed_executor.py
+10
-10
No files found.
vllm/executor/ray_distributed_executor.py
View file @
12575cfa
...
...
@@ -557,8 +557,17 @@ class RayDistributedExecutor(DistributedExecutorBase):
def
_compiled_ray_dag
(
self
,
enable_asyncio
:
bool
):
assert
self
.
parallel_config
.
use_ray
self
.
_check_ray_cgraph_installation
()
# Enlarge the default value of "RAY_CGRAPH_get_timeout" to 300 seconds
# (it is 10 seconds by default). This is a Ray environment variable to
# control the timeout of getting result from a compiled graph execution,
# i.e., the distributed execution that includes model forward runs and
# intermediate tensor communications, in the case of vllm.
# Note: we should set this env var before importing
# ray.dag, otherwise it will not take effect.
os
.
environ
.
setdefault
(
"RAY_CGRAPH_get_timeout"
,
"300"
)
# noqa: SIM112
from
ray.dag
import
InputNode
,
MultiOutputNode
logger
.
info
(
"RAY_CGRAPH_get_timeout is set to %s"
,
os
.
environ
[
"RAY_CGRAPH_get_timeout"
])
# noqa: SIM112
logger
.
info
(
"VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE = %s"
,
envs
.
VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE
)
logger
.
info
(
"VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM = %s"
,
...
...
@@ -570,15 +579,6 @@ class RayDistributedExecutor(DistributedExecutorBase):
"Invalid value for VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE: "
f
"
{
channel_type
}
. Valid values are: 'auto', 'nccl', or 'shm'."
)
# Enlarge the default value of "RAY_CGRAPH_get_timeout" to 300 seconds
# (it is 10 seconds by default). This is a Ray environment variable to
# control the timeout of getting result from a compiled graph execution,
# i.e., the distributed execution that includes model forward runs and
# intermediate tensor communications, in the case of vllm.
os
.
environ
.
setdefault
(
"RAY_CGRAPH_get_timeout"
,
"300"
)
# noqa: SIM112
logger
.
info
(
"RAY_CGRAPH_get_timeout is set to %s"
,
os
.
environ
[
"RAY_CGRAPH_get_timeout"
])
# noqa: SIM112
with
InputNode
()
as
input_data
:
# Example DAG: PP=2, TP=4
#
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment