Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
660dea12
Unverified
Commit
660dea12
authored
Aug 02, 2024
by
youkaichao
Committed by
GitHub
Aug 02, 2024
Browse files
[cuda][misc] remove error_on_invalid_device_count_status (#7069)
parent
cf2a1a4d
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
3 additions
and
32 deletions
+3
-32
vllm/executor/multiproc_gpu_executor.py
vllm/executor/multiproc_gpu_executor.py
+0
-3
vllm/executor/ray_gpu_executor.py
vllm/executor/ray_gpu_executor.py
+3
-6
vllm/utils.py
vllm/utils.py
+0
-23
No files found.
vllm/executor/multiproc_gpu_executor.py
View file @
660dea12
...
@@ -17,7 +17,6 @@ from vllm.logger import init_logger
...
@@ -17,7 +17,6 @@ from vllm.logger import init_logger
from
vllm.sequence
import
ExecuteModelRequest
,
SamplerOutput
from
vllm.sequence
import
ExecuteModelRequest
,
SamplerOutput
from
vllm.triton_utils
import
maybe_set_triton_cache_manager
from
vllm.triton_utils
import
maybe_set_triton_cache_manager
from
vllm.utils
import
(
_run_task_with_lock
,
cuda_device_count_stateless
,
from
vllm.utils
import
(
_run_task_with_lock
,
cuda_device_count_stateless
,
error_on_invalid_device_count_status
,
get_distributed_init_method
,
get_open_port
,
get_distributed_init_method
,
get_open_port
,
get_vllm_instance_id
,
make_async
,
get_vllm_instance_id
,
make_async
,
update_environment_variables
)
update_environment_variables
)
...
@@ -79,8 +78,6 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
...
@@ -79,8 +78,6 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
f
"please ensure that world_size (
{
world_size
}
) "
f
"please ensure that world_size (
{
world_size
}
) "
f
"is less than than max local gpu count (
{
cuda_device_count
}
)"
)
f
"is less than than max local gpu count (
{
cuda_device_count
}
)"
)
error_on_invalid_device_count_status
()
# Multiprocessing-based executor does not support multi-node setting.
# Multiprocessing-based executor does not support multi-node setting.
# Since it only works for single node, we can use the loopback address
# Since it only works for single node, we can use the loopback address
# 127.0.0.1 for communication.
# 127.0.0.1 for communication.
...
...
vllm/executor/ray_gpu_executor.py
View file @
660dea12
...
@@ -10,10 +10,9 @@ from vllm.executor.distributed_gpu_executor import ( # yapf: disable
...
@@ -10,10 +10,9 @@ from vllm.executor.distributed_gpu_executor import ( # yapf: disable
from
vllm.executor.ray_utils
import
RayWorkerWrapper
,
ray
from
vllm.executor.ray_utils
import
RayWorkerWrapper
,
ray
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.sequence
import
ExecuteModelRequest
,
SamplerOutput
from
vllm.sequence
import
ExecuteModelRequest
,
SamplerOutput
from
vllm.utils
import
(
_run_task_with_lock
,
from
vllm.utils
import
(
_run_task_with_lock
,
get_distributed_init_method
,
error_on_invalid_device_count_status
,
get_ip
,
get_open_port
,
get_vllm_instance_id
,
get_distributed_init_method
,
get_ip
,
get_open_port
,
make_async
)
get_vllm_instance_id
,
make_async
)
if
ray
is
not
None
:
if
ray
is
not
None
:
from
ray.util.scheduling_strategies
import
PlacementGroupSchedulingStrategy
from
ray.util.scheduling_strategies
import
PlacementGroupSchedulingStrategy
...
@@ -216,8 +215,6 @@ class RayGPUExecutor(DistributedGPUExecutor):
...
@@ -216,8 +215,6 @@ class RayGPUExecutor(DistributedGPUExecutor):
distributed_init_method
=
get_distributed_init_method
(
distributed_init_method
=
get_distributed_init_method
(
driver_ip
,
get_open_port
())
driver_ip
,
get_open_port
())
error_on_invalid_device_count_status
()
# Initialize the actual workers inside worker wrapper.
# Initialize the actual workers inside worker wrapper.
init_worker_all_kwargs
=
[
init_worker_all_kwargs
=
[
self
.
_get_worker_kwargs
(
self
.
_get_worker_kwargs
(
...
...
vllm/utils.py
View file @
660dea12
import
argparse
import
argparse
import
asyncio
import
asyncio
import
contextlib
import
datetime
import
datetime
import
enum
import
enum
import
gc
import
gc
...
@@ -923,28 +922,6 @@ def cuda_device_count_stateless() -> int:
...
@@ -923,28 +922,6 @@ def cuda_device_count_stateless() -> int:
return
_cuda_device_count_stateless
(
envs
.
CUDA_VISIBLE_DEVICES
)
return
_cuda_device_count_stateless
(
envs
.
CUDA_VISIBLE_DEVICES
)
def
error_on_invalid_device_count_status
():
cache_entries
=
0
with
contextlib
.
suppress
(
Exception
):
# future pytorch will fix the issue, device_count will not be cached
# at that time, `.cache_info().currsize` will error out
cache_entries
=
torch
.
cuda
.
device_count
.
cache_info
(
# type: ignore
).
currsize
if
cache_entries
!=
0
:
# the function is already called, and the result is cached
remembered
=
torch
.
cuda
.
device_count
()
current
=
cuda_device_count_stateless
()
if
remembered
>
current
:
raise
RuntimeError
(
"The number of CUDA devices has changed since the first "
"call to torch.cuda.device_count(). This is not allowed "
"and may result in undefined behavior. Please check out "
"https://github.com/vllm-project/vllm/issues/6056 to "
"find the first call to torch.cuda.device_count() "
"and defer it until the engine is up. Or you can set "
"CUDA_VISIBLE_DEVICES to the GPUs you want to use."
)
# NVML utils
# NVML utils
# Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`,
# Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`,
# all the related functions work on real physical device ids.
# all the related functions work on real physical device ids.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment