Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
5ce9daea
"backend/apps/vscode:/vscode.git/clone" did not exist on "99d10d1189452ad49fcace219e9c90ae65906cd1"
Unverified
Commit
5ce9daea
authored
Dec 17, 2024
by
Hui Liu
Committed by
GitHub
Dec 17, 2024
Browse files
ROCm support for sglang.check_env (#2426)
parent
ce094a5d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
140 additions
and
48 deletions
+140
-48
python/sglang/check_env.py
python/sglang/check_env.py
+140
-48
No files found.
python/sglang/check_env.py
View file @
5ce9daea
...
@@ -9,6 +9,13 @@ from collections import OrderedDict, defaultdict
...
@@ -9,6 +9,13 @@ from collections import OrderedDict, defaultdict
import
torch
import
torch
from
sglang.srt.utils
import
is_hip
def
is_cuda_v2
():
return
torch
.
version
.
cuda
is
not
None
# List of packages to check versions
# List of packages to check versions
PACKAGE_LIST
=
[
PACKAGE_LIST
=
[
"sglang"
,
"sglang"
,
...
@@ -63,6 +70,7 @@ def get_cuda_info():
...
@@ -63,6 +70,7 @@ def get_cuda_info():
"""
"""
Get CUDA-related information if available.
Get CUDA-related information if available.
"""
"""
if
is_cuda_v2
():
cuda_info
=
{
"CUDA available"
:
torch
.
cuda
.
is_available
()}
cuda_info
=
{
"CUDA available"
:
torch
.
cuda
.
is_available
()}
if
cuda_info
[
"CUDA available"
]:
if
cuda_info
[
"CUDA available"
]:
...
@@ -70,6 +78,14 @@ def get_cuda_info():
...
@@ -70,6 +78,14 @@ def get_cuda_info():
cuda_info
.
update
(
_get_cuda_version_info
())
cuda_info
.
update
(
_get_cuda_version_info
())
return
cuda_info
return
cuda_info
elif
is_hip
():
cuda_info
=
{
"ROCM available"
:
torch
.
cuda
.
is_available
()}
if
cuda_info
[
"ROCM available"
]:
cuda_info
.
update
(
_get_gpu_info
())
cuda_info
.
update
(
_get_cuda_version_info
())
return
cuda_info
def
_get_gpu_info
():
def
_get_gpu_info
():
...
@@ -103,6 +119,7 @@ def _get_cuda_version_info():
...
@@ -103,6 +119,7 @@ def _get_cuda_version_info():
"""
"""
Get CUDA version information.
Get CUDA version information.
"""
"""
if
is_cuda_v2
():
from
torch.utils.cpp_extension
import
CUDA_HOME
from
torch.utils.cpp_extension
import
CUDA_HOME
cuda_info
=
{
"CUDA_HOME"
:
CUDA_HOME
}
cuda_info
=
{
"CUDA_HOME"
:
CUDA_HOME
}
...
@@ -112,26 +129,63 @@ def _get_cuda_version_info():
...
@@ -112,26 +129,63 @@ def _get_cuda_version_info():
cuda_info
.
update
(
_get_cuda_driver_version
())
cuda_info
.
update
(
_get_cuda_driver_version
())
return
cuda_info
return
cuda_info
elif
is_hip
():
from
torch.utils.cpp_extension
import
ROCM_HOME
as
ROCM_HOME
cuda_info
=
{
"ROCM_HOME"
:
ROCM_HOME
}
if
ROCM_HOME
and
os
.
path
.
isdir
(
ROCM_HOME
):
cuda_info
.
update
(
_get_nvcc_info
())
cuda_info
.
update
(
_get_cuda_driver_version
())
return
cuda_info
else
:
cuda_info
=
{
"CUDA_HOME"
:
""
}
return
cuda_info
def
_get_nvcc_info
():
def
_get_nvcc_info
():
"""
"""
Get NVCC version information.
Get NVCC version information.
"""
"""
if
is_cuda_v2
():
from
torch.utils.cpp_extension
import
CUDA_HOME
from
torch.utils.cpp_extension
import
CUDA_HOME
try
:
try
:
nvcc
=
os
.
path
.
join
(
CUDA_HOME
,
"bin/nvcc"
)
nvcc
=
os
.
path
.
join
(
CUDA_HOME
,
"bin/nvcc"
)
nvcc_output
=
(
nvcc_output
=
(
subprocess
.
check_output
(
f
'"
{
nvcc
}
" -V'
,
shell
=
True
).
decode
(
"utf-8"
).
strip
()
subprocess
.
check_output
(
f
'"
{
nvcc
}
" -V'
,
shell
=
True
)
.
decode
(
"utf-8"
)
.
strip
()
)
)
return
{
return
{
"NVCC"
:
nvcc_output
[
"NVCC"
:
nvcc_output
[
nvcc_output
.
rfind
(
"Cuda compilation tools"
)
:
nvcc_output
.
rfind
(
"Build"
)
nvcc_output
.
rfind
(
"Cuda compilation tools"
)
:
nvcc_output
.
rfind
(
"Build"
)
].
strip
()
].
strip
()
}
}
except
subprocess
.
SubprocessError
:
except
subprocess
.
SubprocessError
:
return
{
"NVCC"
:
"Not Available"
}
return
{
"NVCC"
:
"Not Available"
}
elif
is_hip
():
from
torch.utils.cpp_extension
import
ROCM_HOME
try
:
hipcc
=
os
.
path
.
join
(
ROCM_HOME
,
"bin/hipcc"
)
hipcc_output
=
(
subprocess
.
check_output
(
f
'"
{
hipcc
}
" --version'
,
shell
=
True
)
.
decode
(
"utf-8"
)
.
strip
()
)
return
{
"HIPCC"
:
hipcc_output
[
hipcc_output
.
rfind
(
"HIP version"
)
:
hipcc_output
.
rfind
(
"AMD clang"
)
].
strip
()
}
except
subprocess
.
SubprocessError
:
return
{
"HIPCC"
:
"Not Available"
}
else
:
return
{
"NVCC"
:
"Not Available"
}
def
_get_cuda_driver_version
():
def
_get_cuda_driver_version
():
...
@@ -139,6 +193,7 @@ def _get_cuda_driver_version():
...
@@ -139,6 +193,7 @@ def _get_cuda_driver_version():
Get CUDA driver version.
Get CUDA driver version.
"""
"""
versions
=
set
()
versions
=
set
()
if
is_cuda_v2
():
try
:
try
:
output
=
subprocess
.
check_output
(
output
=
subprocess
.
check_output
(
[
[
...
@@ -154,12 +209,32 @@ def _get_cuda_driver_version():
...
@@ -154,12 +209,32 @@ def _get_cuda_driver_version():
return
{
"CUDA Driver Versions"
:
", "
.
join
(
sorted
(
versions
))}
return
{
"CUDA Driver Versions"
:
", "
.
join
(
sorted
(
versions
))}
except
subprocess
.
SubprocessError
:
except
subprocess
.
SubprocessError
:
return
{
"CUDA Driver Version"
:
"Not Available"
}
return
{
"CUDA Driver Version"
:
"Not Available"
}
elif
is_hip
():
try
:
output
=
subprocess
.
check_output
(
[
"rocm-smi"
,
"--showdriverversion"
,
"--csv"
,
]
)
versions
=
set
(
output
.
decode
().
strip
().
split
(
"
\n
"
))
versions
.
discard
(
"name, value"
)
ver
=
versions
.
pop
()
ver
=
ver
.
replace
(
'"Driver version", '
,
""
).
replace
(
'"'
,
""
)
return
{
"ROCM Driver Version"
:
ver
}
except
subprocess
.
SubprocessError
:
return
{
"ROCM Driver Version"
:
"Not Available"
}
else
:
return
{
"CUDA Driver Version"
:
"Not Available"
}
def
get_gpu_topology
():
def
get_gpu_topology
():
"""
"""
Get GPU topology information.
Get GPU topology information.
"""
"""
if
is_cuda_v2
():
try
:
try
:
result
=
subprocess
.
run
(
result
=
subprocess
.
run
(
[
"nvidia-smi"
,
"topo"
,
"-m"
],
[
"nvidia-smi"
,
"topo"
,
"-m"
],
...
@@ -171,6 +246,20 @@ def get_gpu_topology():
...
@@ -171,6 +246,20 @@ def get_gpu_topology():
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
except
subprocess
.
SubprocessError
:
except
subprocess
.
SubprocessError
:
return
None
return
None
elif
is_hip
():
try
:
result
=
subprocess
.
run
(
[
"rocm-smi"
,
"--showtopotype"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
check
=
True
,
)
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
except
subprocess
.
SubprocessError
:
return
None
else
:
return
None
def
get_hypervisor_vendor
():
def
get_hypervisor_vendor
():
...
@@ -196,7 +285,10 @@ def check_env():
...
@@ -196,7 +285,10 @@ def check_env():
gpu_topo
=
get_gpu_topology
()
gpu_topo
=
get_gpu_topology
()
if
gpu_topo
:
if
gpu_topo
:
if
is_cuda_v2
():
env_info
[
"NVIDIA Topology"
]
=
gpu_topo
env_info
[
"NVIDIA Topology"
]
=
gpu_topo
elif
is_hip
():
env_info
[
"AMD Topology"
]
=
gpu_topo
hypervisor_vendor
=
get_hypervisor_vendor
()
hypervisor_vendor
=
get_hypervisor_vendor
()
if
hypervisor_vendor
:
if
hypervisor_vendor
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment