Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
5ce9daea
Unverified
Commit
5ce9daea
authored
Dec 17, 2024
by
Hui Liu
Committed by
GitHub
Dec 17, 2024
Browse files
ROCm support for sglang.check_env (#2426)
parent
ce094a5d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
140 additions
and
48 deletions
+140
-48
python/sglang/check_env.py
python/sglang/check_env.py
+140
-48
No files found.
python/sglang/check_env.py
View file @
5ce9daea
...
@@ -9,6 +9,13 @@ from collections import OrderedDict, defaultdict
...
@@ -9,6 +9,13 @@ from collections import OrderedDict, defaultdict
import
torch
import
torch
from
sglang.srt.utils
import
is_hip
def
is_cuda_v2
():
return
torch
.
version
.
cuda
is
not
None
# List of packages to check versions
# List of packages to check versions
PACKAGE_LIST
=
[
PACKAGE_LIST
=
[
"sglang"
,
"sglang"
,
...
@@ -63,6 +70,7 @@ def get_cuda_info():
...
@@ -63,6 +70,7 @@ def get_cuda_info():
"""
"""
Get CUDA-related information if available.
Get CUDA-related information if available.
"""
"""
if
is_cuda_v2
():
cuda_info
=
{
"CUDA available"
:
torch
.
cuda
.
is_available
()}
cuda_info
=
{
"CUDA available"
:
torch
.
cuda
.
is_available
()}
if
cuda_info
[
"CUDA available"
]:
if
cuda_info
[
"CUDA available"
]:
...
@@ -70,6 +78,14 @@ def get_cuda_info():
...
@@ -70,6 +78,14 @@ def get_cuda_info():
cuda_info
.
update
(
_get_cuda_version_info
())
cuda_info
.
update
(
_get_cuda_version_info
())
return
cuda_info
return
cuda_info
elif
is_hip
():
cuda_info
=
{
"ROCM available"
:
torch
.
cuda
.
is_available
()}
if
cuda_info
[
"ROCM available"
]:
cuda_info
.
update
(
_get_gpu_info
())
cuda_info
.
update
(
_get_cuda_version_info
())
return
cuda_info
def
_get_gpu_info
():
def
_get_gpu_info
():
...
@@ -103,6 +119,7 @@ def _get_cuda_version_info():
...
@@ -103,6 +119,7 @@ def _get_cuda_version_info():
"""
"""
Get CUDA version information.
Get CUDA version information.
"""
"""
if
is_cuda_v2
():
from
torch.utils.cpp_extension
import
CUDA_HOME
from
torch.utils.cpp_extension
import
CUDA_HOME
cuda_info
=
{
"CUDA_HOME"
:
CUDA_HOME
}
cuda_info
=
{
"CUDA_HOME"
:
CUDA_HOME
}
...
@@ -112,26 +129,63 @@ def _get_cuda_version_info():
...
@@ -112,26 +129,63 @@ def _get_cuda_version_info():
cuda_info
.
update
(
_get_cuda_driver_version
())
cuda_info
.
update
(
_get_cuda_driver_version
())
return
cuda_info
return
cuda_info
elif
is_hip
():
from
torch.utils.cpp_extension
import
ROCM_HOME
as
ROCM_HOME
cuda_info
=
{
"ROCM_HOME"
:
ROCM_HOME
}
if
ROCM_HOME
and
os
.
path
.
isdir
(
ROCM_HOME
):
cuda_info
.
update
(
_get_nvcc_info
())
cuda_info
.
update
(
_get_cuda_driver_version
())
return
cuda_info
else
:
cuda_info
=
{
"CUDA_HOME"
:
""
}
return
cuda_info
def
_get_nvcc_info
():
def
_get_nvcc_info
():
"""
"""
Get NVCC version information.
Get NVCC version information.
"""
"""
if
is_cuda_v2
():
from
torch.utils.cpp_extension
import
CUDA_HOME
from
torch.utils.cpp_extension
import
CUDA_HOME
try
:
try
:
nvcc
=
os
.
path
.
join
(
CUDA_HOME
,
"bin/nvcc"
)
nvcc
=
os
.
path
.
join
(
CUDA_HOME
,
"bin/nvcc"
)
nvcc_output
=
(
nvcc_output
=
(
subprocess
.
check_output
(
f
'"
{
nvcc
}
" -V'
,
shell
=
True
).
decode
(
"utf-8"
).
strip
()
subprocess
.
check_output
(
f
'"
{
nvcc
}
" -V'
,
shell
=
True
)
.
decode
(
"utf-8"
)
.
strip
()
)
)
return
{
return
{
"NVCC"
:
nvcc_output
[
"NVCC"
:
nvcc_output
[
nvcc_output
.
rfind
(
"Cuda compilation tools"
)
:
nvcc_output
.
rfind
(
"Build"
)
nvcc_output
.
rfind
(
"Cuda compilation tools"
)
:
nvcc_output
.
rfind
(
"Build"
)
].
strip
()
].
strip
()
}
}
except
subprocess
.
SubprocessError
:
except
subprocess
.
SubprocessError
:
return
{
"NVCC"
:
"Not Available"
}
return
{
"NVCC"
:
"Not Available"
}
elif
is_hip
():
from
torch.utils.cpp_extension
import
ROCM_HOME
try
:
hipcc
=
os
.
path
.
join
(
ROCM_HOME
,
"bin/hipcc"
)
hipcc_output
=
(
subprocess
.
check_output
(
f
'"
{
hipcc
}
" --version'
,
shell
=
True
)
.
decode
(
"utf-8"
)
.
strip
()
)
return
{
"HIPCC"
:
hipcc_output
[
hipcc_output
.
rfind
(
"HIP version"
)
:
hipcc_output
.
rfind
(
"AMD clang"
)
].
strip
()
}
except
subprocess
.
SubprocessError
:
return
{
"HIPCC"
:
"Not Available"
}
else
:
return
{
"NVCC"
:
"Not Available"
}
def
_get_cuda_driver_version
():
def
_get_cuda_driver_version
():
...
@@ -139,6 +193,7 @@ def _get_cuda_driver_version():
...
@@ -139,6 +193,7 @@ def _get_cuda_driver_version():
Get CUDA driver version.
Get CUDA driver version.
"""
"""
versions
=
set
()
versions
=
set
()
if
is_cuda_v2
():
try
:
try
:
output
=
subprocess
.
check_output
(
output
=
subprocess
.
check_output
(
[
[
...
@@ -154,12 +209,32 @@ def _get_cuda_driver_version():
...
@@ -154,12 +209,32 @@ def _get_cuda_driver_version():
return
{
"CUDA Driver Versions"
:
", "
.
join
(
sorted
(
versions
))}
return
{
"CUDA Driver Versions"
:
", "
.
join
(
sorted
(
versions
))}
except
subprocess
.
SubprocessError
:
except
subprocess
.
SubprocessError
:
return
{
"CUDA Driver Version"
:
"Not Available"
}
return
{
"CUDA Driver Version"
:
"Not Available"
}
elif
is_hip
():
try
:
output
=
subprocess
.
check_output
(
[
"rocm-smi"
,
"--showdriverversion"
,
"--csv"
,
]
)
versions
=
set
(
output
.
decode
().
strip
().
split
(
"
\n
"
))
versions
.
discard
(
"name, value"
)
ver
=
versions
.
pop
()
ver
=
ver
.
replace
(
'"Driver version", '
,
""
).
replace
(
'"'
,
""
)
return
{
"ROCM Driver Version"
:
ver
}
except
subprocess
.
SubprocessError
:
return
{
"ROCM Driver Version"
:
"Not Available"
}
else
:
return
{
"CUDA Driver Version"
:
"Not Available"
}
def
get_gpu_topology
():
def
get_gpu_topology
():
"""
"""
Get GPU topology information.
Get GPU topology information.
"""
"""
if
is_cuda_v2
():
try
:
try
:
result
=
subprocess
.
run
(
result
=
subprocess
.
run
(
[
"nvidia-smi"
,
"topo"
,
"-m"
],
[
"nvidia-smi"
,
"topo"
,
"-m"
],
...
@@ -171,6 +246,20 @@ def get_gpu_topology():
...
@@ -171,6 +246,20 @@ def get_gpu_topology():
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
except
subprocess
.
SubprocessError
:
except
subprocess
.
SubprocessError
:
return
None
return
None
elif
is_hip
():
try
:
result
=
subprocess
.
run
(
[
"rocm-smi"
,
"--showtopotype"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
check
=
True
,
)
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
except
subprocess
.
SubprocessError
:
return
None
else
:
return
None
def
get_hypervisor_vendor
():
def
get_hypervisor_vendor
():
...
@@ -196,7 +285,10 @@ def check_env():
...
@@ -196,7 +285,10 @@ def check_env():
gpu_topo
=
get_gpu_topology
()
gpu_topo
=
get_gpu_topology
()
if
gpu_topo
:
if
gpu_topo
:
if
is_cuda_v2
():
env_info
[
"NVIDIA Topology"
]
=
gpu_topo
env_info
[
"NVIDIA Topology"
]
=
gpu_topo
elif
is_hip
():
env_info
[
"AMD Topology"
]
=
gpu_topo
hypervisor_vendor
=
get_hypervisor_vendor
()
hypervisor_vendor
=
get_hypervisor_vendor
()
if
hypervisor_vendor
:
if
hypervisor_vendor
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment