Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4695397d
"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "537c9ee5bb7aca2088c0bff8495bc6a69e6c5ea0"
Unverified
Commit
4695397d
authored
Apr 15, 2024
by
Ricky Xu
Committed by
GitHub
Apr 15, 2024
Browse files
[Bugfix] Fix ray workers profiling with nsight (#4095)
parent
d619ae2d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
0 deletions
+19
-0
vllm/executor/ray_gpu_executor.py
vllm/executor/ray_gpu_executor.py
+19
-0
No files found.
vllm/executor/ray_gpu_executor.py
View file @
4695397d
...
@@ -48,6 +48,21 @@ class RayGPUExecutor(ExecutorBase):
...
@@ -48,6 +48,21 @@ class RayGPUExecutor(ExecutorBase):
if
USE_RAY_COMPILED_DAG
:
if
USE_RAY_COMPILED_DAG
:
self
.
forward_dag
=
self
.
_compiled_ray_dag
()
self
.
forward_dag
=
self
.
_compiled_ray_dag
()
def
_configure_ray_workers_use_nsight
(
self
,
ray_remote_kwargs
)
->
Dict
[
str
,
Any
]:
# If nsight profiling is enabled, we need to set the profiling
# configuration for the ray workers as runtime env.
runtime_env
=
ray_remote_kwargs
.
setdefault
(
"runtime_env"
,
{})
runtime_env
.
update
({
"nsight"
:
{
"t"
:
"cuda,cudnn,cublas"
,
"o"
:
"'worker_process_%p'"
,
"cuda-graph-trace"
:
"node"
,
}
})
return
ray_remote_kwargs
def
_init_workers_ray
(
self
,
placement_group
:
"PlacementGroup"
,
def
_init_workers_ray
(
self
,
placement_group
:
"PlacementGroup"
,
**
ray_remote_kwargs
):
**
ray_remote_kwargs
):
if
self
.
parallel_config
.
tensor_parallel_size
==
1
:
if
self
.
parallel_config
.
tensor_parallel_size
==
1
:
...
@@ -63,6 +78,10 @@ class RayGPUExecutor(ExecutorBase):
...
@@ -63,6 +78,10 @@ class RayGPUExecutor(ExecutorBase):
# The remaining workers are the actual ray actors.
# The remaining workers are the actual ray actors.
self
.
workers
:
List
[
RayWorkerVllm
]
=
[]
self
.
workers
:
List
[
RayWorkerVllm
]
=
[]
if
self
.
parallel_config
.
ray_workers_use_nsight
:
ray_remote_kwargs
=
self
.
_configure_ray_workers_use_nsight
(
ray_remote_kwargs
)
# Create the workers.
# Create the workers.
driver_ip
=
get_ip
()
driver_ip
=
get_ip
()
for
bundle_id
,
bundle
in
enumerate
(
placement_group
.
bundle_specs
):
for
bundle_id
,
bundle
in
enumerate
(
placement_group
.
bundle_specs
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment