Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
5ce9daea
Unverified
Commit
5ce9daea
authored
Dec 17, 2024
by
Hui Liu
Committed by
GitHub
Dec 17, 2024
Browse files
ROCm support for sglang.check_env (#2426)
parent
ce094a5d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
140 additions
and
48 deletions
+140
-48
python/sglang/check_env.py
python/sglang/check_env.py
+140
-48
No files found.
python/sglang/check_env.py
View file @
5ce9daea
...
@@ -9,6 +9,13 @@ from collections import OrderedDict, defaultdict
...
@@ -9,6 +9,13 @@ from collections import OrderedDict, defaultdict
import
torch
import
torch
from
sglang.srt.utils
import
is_hip
def
is_cuda_v2
():
return
torch
.
version
.
cuda
is
not
None
# List of packages to check versions
# List of packages to check versions
PACKAGE_LIST
=
[
PACKAGE_LIST
=
[
"sglang"
,
"sglang"
,
...
@@ -63,13 +70,22 @@ def get_cuda_info():
...
@@ -63,13 +70,22 @@ def get_cuda_info():
"""
"""
Get CUDA-related information if available.
Get CUDA-related information if available.
"""
"""
cuda_info
=
{
"CUDA available"
:
torch
.
cuda
.
is_available
()}
if
is_cuda_v2
():
cuda_info
=
{
"CUDA available"
:
torch
.
cuda
.
is_available
()}
if
cuda_info
[
"CUDA available"
]:
cuda_info
.
update
(
_get_gpu_info
())
cuda_info
.
update
(
_get_cuda_version_info
())
return
cuda_info
elif
is_hip
():
cuda_info
=
{
"ROCM available"
:
torch
.
cuda
.
is_available
()}
if
cuda_info
[
"
CUDA
available"
]:
if
cuda_info
[
"
ROCM
available"
]:
cuda_info
.
update
(
_get_gpu_info
())
cuda_info
.
update
(
_get_gpu_info
())
cuda_info
.
update
(
_get_cuda_version_info
())
cuda_info
.
update
(
_get_cuda_version_info
())
return
cuda_info
return
cuda_info
def
_get_gpu_info
():
def
_get_gpu_info
():
...
@@ -103,34 +119,72 @@ def _get_cuda_version_info():
...
@@ -103,34 +119,72 @@ def _get_cuda_version_info():
"""
"""
Get CUDA version information.
Get CUDA version information.
"""
"""
from
torch.utils.cpp_extension
import
CUDA_HOME
if
is_cuda_v2
():
from
torch.utils.cpp_extension
import
CUDA_HOME
cuda_info
=
{
"CUDA_HOME"
:
CUDA_HOME
}
cuda_info
=
{
"CUDA_HOME"
:
CUDA_HOME
}
if
CUDA_HOME
and
os
.
path
.
isdir
(
CUDA_HOME
):
if
CUDA_HOME
and
os
.
path
.
isdir
(
CUDA_HOME
):
cuda_info
.
update
(
_get_nvcc_info
())
cuda_info
.
update
(
_get_nvcc_info
())
cuda_info
.
update
(
_get_cuda_driver_version
())
cuda_info
.
update
(
_get_cuda_driver_version
())
return
cuda_info
return
cuda_info
elif
is_hip
():
from
torch.utils.cpp_extension
import
ROCM_HOME
as
ROCM_HOME
cuda_info
=
{
"ROCM_HOME"
:
ROCM_HOME
}
if
ROCM_HOME
and
os
.
path
.
isdir
(
ROCM_HOME
):
cuda_info
.
update
(
_get_nvcc_info
())
cuda_info
.
update
(
_get_cuda_driver_version
())
return
cuda_info
else
:
cuda_info
=
{
"CUDA_HOME"
:
""
}
return
cuda_info
def
_get_nvcc_info
():
def
_get_nvcc_info
():
"""
"""
Get NVCC version information.
Get NVCC version information.
"""
"""
from
torch.utils.cpp_extension
import
CUDA_HOME
if
is_cuda_v2
():
from
torch.utils.cpp_extension
import
CUDA_HOME
try
:
try
:
nvcc
=
os
.
path
.
join
(
CUDA_HOME
,
"bin/nvcc"
)
nvcc
=
os
.
path
.
join
(
CUDA_HOME
,
"bin/nvcc"
)
nvcc_output
=
(
nvcc_output
=
(
subprocess
.
check_output
(
f
'"
{
nvcc
}
" -V'
,
shell
=
True
).
decode
(
"utf-8"
).
strip
()
subprocess
.
check_output
(
f
'"
{
nvcc
}
" -V'
,
shell
=
True
)
)
.
decode
(
"utf-8"
)
return
{
.
strip
()
"NVCC"
:
nvcc_output
[
)
nvcc_output
.
rfind
(
"Cuda compilation tools"
)
:
nvcc_output
.
rfind
(
"Build"
)
return
{
].
strip
()
"NVCC"
:
nvcc_output
[
}
nvcc_output
.
rfind
(
"Cuda compilation tools"
)
:
nvcc_output
.
rfind
(
except
subprocess
.
SubprocessError
:
"Build"
)
].
strip
()
}
except
subprocess
.
SubprocessError
:
return
{
"NVCC"
:
"Not Available"
}
elif
is_hip
():
from
torch.utils.cpp_extension
import
ROCM_HOME
try
:
hipcc
=
os
.
path
.
join
(
ROCM_HOME
,
"bin/hipcc"
)
hipcc_output
=
(
subprocess
.
check_output
(
f
'"
{
hipcc
}
" --version'
,
shell
=
True
)
.
decode
(
"utf-8"
)
.
strip
()
)
return
{
"HIPCC"
:
hipcc_output
[
hipcc_output
.
rfind
(
"HIP version"
)
:
hipcc_output
.
rfind
(
"AMD clang"
)
].
strip
()
}
except
subprocess
.
SubprocessError
:
return
{
"HIPCC"
:
"Not Available"
}
else
:
return
{
"NVCC"
:
"Not Available"
}
return
{
"NVCC"
:
"Not Available"
}
...
@@ -139,20 +193,40 @@ def _get_cuda_driver_version():
...
@@ -139,20 +193,40 @@ def _get_cuda_driver_version():
Get CUDA driver version.
Get CUDA driver version.
"""
"""
versions
=
set
()
versions
=
set
()
try
:
if
is_cuda_v2
():
output
=
subprocess
.
check_output
(
try
:
[
output
=
subprocess
.
check_output
(
"nvidia-smi"
,
[
"--query-gpu=driver_version"
,
"nvidia-smi"
,
"--format=csv,noheader,nounits"
,
"--query-gpu=driver_version"
,
]
"--format=csv,noheader,nounits"
,
)
]
versions
=
set
(
output
.
decode
().
strip
().
split
(
"
\n
"
))
)
if
len
(
versions
)
==
1
:
versions
=
set
(
output
.
decode
().
strip
().
split
(
"
\n
"
))
return
{
"CUDA Driver Version"
:
versions
.
pop
()}
if
len
(
versions
)
==
1
:
else
:
return
{
"CUDA Driver Version"
:
versions
.
pop
()}
return
{
"CUDA Driver Versions"
:
", "
.
join
(
sorted
(
versions
))}
else
:
except
subprocess
.
SubprocessError
:
return
{
"CUDA Driver Versions"
:
", "
.
join
(
sorted
(
versions
))}
except
subprocess
.
SubprocessError
:
return
{
"CUDA Driver Version"
:
"Not Available"
}
elif
is_hip
():
try
:
output
=
subprocess
.
check_output
(
[
"rocm-smi"
,
"--showdriverversion"
,
"--csv"
,
]
)
versions
=
set
(
output
.
decode
().
strip
().
split
(
"
\n
"
))
versions
.
discard
(
"name, value"
)
ver
=
versions
.
pop
()
ver
=
ver
.
replace
(
'"Driver version", '
,
""
).
replace
(
'"'
,
""
)
return
{
"ROCM Driver Version"
:
ver
}
except
subprocess
.
SubprocessError
:
return
{
"ROCM Driver Version"
:
"Not Available"
}
else
:
return
{
"CUDA Driver Version"
:
"Not Available"
}
return
{
"CUDA Driver Version"
:
"Not Available"
}
...
@@ -160,16 +234,31 @@ def get_gpu_topology():
...
@@ -160,16 +234,31 @@ def get_gpu_topology():
"""
"""
Get GPU topology information.
Get GPU topology information.
"""
"""
try
:
if
is_cuda_v2
():
result
=
subprocess
.
run
(
try
:
[
"nvidia-smi"
,
"topo"
,
"-m"
],
result
=
subprocess
.
run
(
stdout
=
subprocess
.
PIPE
,
[
"nvidia-smi"
,
"topo"
,
"-m"
],
stderr
=
subprocess
.
PIPE
,
stdout
=
subprocess
.
PIPE
,
text
=
True
,
stderr
=
subprocess
.
PIPE
,
check
=
True
,
text
=
True
,
)
check
=
True
,
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
)
except
subprocess
.
SubprocessError
:
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
except
subprocess
.
SubprocessError
:
return
None
elif
is_hip
():
try
:
result
=
subprocess
.
run
(
[
"rocm-smi"
,
"--showtopotype"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
check
=
True
,
)
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
except
subprocess
.
SubprocessError
:
return
None
else
:
return
None
return
None
...
@@ -196,7 +285,10 @@ def check_env():
...
@@ -196,7 +285,10 @@ def check_env():
gpu_topo
=
get_gpu_topology
()
gpu_topo
=
get_gpu_topology
()
if
gpu_topo
:
if
gpu_topo
:
env_info
[
"NVIDIA Topology"
]
=
gpu_topo
if
is_cuda_v2
():
env_info
[
"NVIDIA Topology"
]
=
gpu_topo
elif
is_hip
():
env_info
[
"AMD Topology"
]
=
gpu_topo
hypervisor_vendor
=
get_hypervisor_vendor
()
hypervisor_vendor
=
get_hypervisor_vendor
()
if
hypervisor_vendor
:
if
hypervisor_vendor
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment