Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
a995a773
Unverified
Commit
a995a773
authored
Apr 04, 2025
by
JieXin Liang
Committed by
GitHub
Apr 04, 2025
Browse files
[fix] remove `cuda_device_count_stateless` (#5060)
parent
31035dda
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
46 deletions
+5
-46
python/sglang/srt/distributed/device_communicators/custom_all_reduce.py
...srt/distributed/device_communicators/custom_all_reduce.py
+2
-2
python/sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py
...stributed/device_communicators/custom_all_reduce_utils.py
+2
-2
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+1
-42
No files found.
python/sglang/srt/distributed/device_communicators/custom_all_reduce.py
View file @
a995a773
...
...
@@ -18,7 +18,7 @@ from sglang.srt.distributed.device_communicators.custom_all_reduce_utils import
gpu_p2p_access_check
,
)
from
sglang.srt.distributed.parallel_state
import
in_the_same_node_as
from
sglang.srt.utils
import
cuda_device_count_stateless
,
is_cuda
,
is_hip
from
sglang.srt.utils
import
is_cuda
,
is_hip
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -217,7 +217,7 @@ class CustomAllreduce:
if
cuda_visible_devices
:
device_ids
=
list
(
map
(
int
,
cuda_visible_devices
.
split
(
","
)))
else
:
device_ids
=
list
(
range
(
cuda
_
device_count
_stateless
()))
device_ids
=
list
(
range
(
torch
.
cuda
.
device_count
()))
physical_device_id
=
device_ids
[
device
.
index
]
tensor
=
torch
.
tensor
([
physical_device_id
],
dtype
=
torch
.
int
,
device
=
"cpu"
)
...
...
python/sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py
View file @
a995a773
...
...
@@ -11,11 +11,11 @@ import tempfile
from
itertools
import
product
from
typing
import
Dict
,
List
,
Optional
,
Sequence
import
torch
import
torch.distributed
as
dist
import
torch.multiprocessing
as
mp
from
sglang.srt.distributed.device_communicators.cuda_wrapper
import
CudaRTLibrary
from
sglang.srt.utils
import
cuda_device_count_stateless
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -218,7 +218,7 @@ def gpu_p2p_access_check(src: int, tgt: int) -> bool:
is_distributed
=
dist
.
is_initialized
()
num_dev
=
cuda
_
device_count
_stateless
()
num_dev
=
torch
.
cuda
.
device_count
()
cuda_visible_devices
=
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
None
)
if
cuda_visible_devices
is
None
:
cuda_visible_devices
=
","
.
join
(
str
(
i
)
for
i
in
range
(
num_dev
))
...
...
python/sglang/srt/utils.py
View file @
a995a773
...
...
@@ -263,7 +263,7 @@ def get_available_gpu_memory(device, gpu_id, distributed=False, empty_cache=True
When distributed is True, the available memory is the minimum available memory of all GPUs.
"""
if
device
==
"cuda"
:
num_gpus
=
cuda
_
device_count
_stateless
()
num_gpus
=
torch
.
cuda
.
device_count
()
assert
gpu_id
<
num_gpus
if
torch
.
cuda
.
current_device
()
!=
gpu_id
:
...
...
@@ -1416,47 +1416,6 @@ def disable_request_logging() -> bool:
return
get_bool_env_var
(
"SGLANG_DISABLE_REQUEST_LOGGING"
)
@
lru_cache
(
maxsize
=
8
)
def
_cuda_device_count_stateless
(
cuda_visible_devices
:
Optional
[
str
]
=
None
)
->
int
:
# Note: cuda_visible_devices is not used, but we keep it as an argument for
# LRU Cache purposes.
# Code below is based on
# https://github.com/pytorch/pytorch/blob/
# c1cd946818442aca8c7f812b16d187ce1586c3bc/
# torch/cuda/__init__.py#L831C1-L831C17
import
torch.version
if
not
torch
.
cuda
.
_is_compiled
():
return
0
if
is_hip
():
# ROCm uses amdsmi instead of nvml for stateless device count
# This requires a sufficiently modern version of Torch 2.4.0
raw_count
=
(
torch
.
cuda
.
_device_count_amdsmi
()
if
(
hasattr
(
torch
.
cuda
,
"_device_count_amdsmi"
))
else
-
1
)
else
:
raw_count
=
torch
.
cuda
.
_device_count_nvml
()
r
=
torch
.
_C
.
_cuda_getDeviceCount
()
if
raw_count
<
0
else
raw_count
return
r
# Adapted from https://github.com/vllm-project/vllm/blob/a6221a144af772fd1a68fe7e627935dc53e81738/vllm/utils.py
def
cuda_device_count_stateless
()
->
int
:
"""Get number of CUDA devices, caching based on the value of
CUDA_VISIBLE_DEVICES at the time of call.
This should be used instead of torch.cuda.device_count()
unless CUDA_VISIBLE_DEVICES has already been set to the desired
value."""
# This can be removed and simply replaced with torch.cuda.get_device_count
# after https://github.com/pytorch/pytorch/pull/122815 is released.
return
_cuda_device_count_stateless
(
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
None
))
def
dataclass_to_string_truncated
(
data
,
max_length
=
2048
,
skip_names
:
Optional
[
Set
[
str
]]
=
None
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment