Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c550ab91
Unverified
Commit
c550ab91
authored
Nov 02, 2025
by
Zhihao Lyu
Committed by
GitHub
Nov 01, 2025
Browse files
[Ascend] Add Ascend NPU support for sglang.check_env & rework proposal (#11052)
Co-authored-by:
ronnie_zheng
<
zl19940307@163.com
>
parent
086f0b79
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
286 additions
and
166 deletions
+286
-166
python/sglang/check_env.py
python/sglang/check_env.py
+286
-166
No files found.
python/sglang/check_env.py
View file @
c550ab91
...
@@ -5,11 +5,12 @@ import os
...
@@ -5,11 +5,12 @@ import os
import
resource
import
resource
import
subprocess
import
subprocess
import
sys
import
sys
from
abc
import
abstractmethod
from
collections
import
OrderedDict
,
defaultdict
from
collections
import
OrderedDict
,
defaultdict
import
torch
import
torch
from
sglang.srt.utils
import
is_hip
from
sglang.srt.utils
import
is_hip
,
is_npu
def
is_cuda_v2
():
def
is_cuda_v2
():
...
@@ -51,12 +52,29 @@ PACKAGE_LIST = [
...
@@ -51,12 +52,29 @@ PACKAGE_LIST = [
]
]
def
get_package_versions
(
packages
):
class
BaseEnv
:
"""Base class for environment check"""
def
__init__
(
self
):
self
.
package_list
=
PACKAGE_LIST
@
abstractmethod
def
get_info
(
self
)
->
dict
:
"""
Get CUDA-related information if available.
"""
raise
NotImplementedError
@
abstractmethod
def
get_topology
(
self
)
->
dict
:
raise
NotImplementedError
def
get_package_versions
(
self
)
->
dict
:
"""
"""
Get versions of specified packages.
Get versions of specified packages.
"""
"""
versions
=
{}
versions
=
{}
for
package
in
package
s
:
for
package
in
self
.
package
_list
:
package_name
=
package
.
split
(
"=="
)[
0
].
split
(
">="
)[
0
].
split
(
"<="
)[
0
]
package_name
=
package
.
split
(
"=="
)[
0
].
split
(
">="
)[
0
].
split
(
"<="
)[
0
]
try
:
try
:
version
=
importlib
.
metadata
.
version
(
package_name
)
version
=
importlib
.
metadata
.
version
(
package_name
)
...
@@ -65,32 +83,9 @@ def get_package_versions(packages):
...
@@ -65,32 +83,9 @@ def get_package_versions(packages):
versions
[
package_name
]
=
"Module Not Found"
versions
[
package_name
]
=
"Module Not Found"
return
versions
return
versions
def
get_device_info
(
self
):
def
get_cuda_info
():
"""
Get CUDA-related information if available.
"""
if
is_cuda_v2
():
cuda_info
=
{
"CUDA available"
:
torch
.
cuda
.
is_available
()}
if
cuda_info
[
"CUDA available"
]:
cuda_info
.
update
(
_get_gpu_info
())
cuda_info
.
update
(
_get_cuda_version_info
())
return
cuda_info
elif
is_hip
():
cuda_info
=
{
"ROCM available"
:
torch
.
cuda
.
is_available
()}
if
cuda_info
[
"ROCM available"
]:
cuda_info
.
update
(
_get_gpu_info
())
cuda_info
.
update
(
_get_cuda_version_info
())
return
cuda_info
def
_get_gpu_info
():
"""
"""
Get information about available GPUs.
Get information about available GPU
device
s.
"""
"""
devices
=
defaultdict
(
list
)
devices
=
defaultdict
(
list
)
capabilities
=
defaultdict
(
list
)
capabilities
=
defaultdict
(
list
)
...
@@ -114,41 +109,67 @@ def _get_gpu_info():
...
@@ -114,41 +109,67 @@ def _get_gpu_info():
return
gpu_info
return
gpu_info
def
get_hypervisor_vendor
(
self
)
->
dict
:
try
:
output
=
subprocess
.
check_output
([
"lscpu"
],
text
=
True
)
for
line
in
output
.
split
(
"
\n
"
):
if
"Hypervisor vendor:"
in
line
:
return
{
"Hypervisor vendor:"
:
line
.
split
(
":"
)[
1
].
strip
()}
return
{}
except
:
return
{}
def
_get_cuda_version_info
():
def
get_ulimit_soft
(
self
)
->
dict
:
ulimit_soft
,
_
=
resource
.
getrlimit
(
resource
.
RLIMIT_NOFILE
)
return
{
"ulimit soft"
:
ulimit_soft
}
def
check_env
(
self
):
"""
"""
Get CUDA version
information.
Check and print environment
information.
"""
"""
if
is_cuda_v2
():
env_info
=
OrderedDict
()
from
torch.utils.cpp_extension
import
CUDA_HOME
env_info
[
"Python"
]
=
sys
.
version
.
replace
(
"
\n
"
,
""
)
env_info
.
update
(
self
.
get_info
())
env_info
[
"PyTorch"
]
=
torch
.
__version__
env_info
.
update
(
self
.
get_package_versions
())
env_info
.
update
(
self
.
get_topology
())
env_info
.
update
(
self
.
get_hypervisor_vendor
())
env_info
.
update
(
self
.
get_ulimit_soft
())
cuda_info
=
{
"CUDA_HOME"
:
CUDA_HOME
}
for
k
,
v
in
env_info
.
items
():
print
(
f
"
{
k
}
:
{
v
}
"
)
if
CUDA_HOME
and
os
.
path
.
isdir
(
CUDA_HOME
):
cuda_info
.
update
(
_get_nvcc_info
())
cuda_info
.
update
(
_get_cuda_driver_version
())
return
cuda_info
class
GPUEnv
(
BaseEnv
):
elif
is_hip
():
"""Environment checker for Nvidia GPU"""
from
torch.utils.cpp_extension
import
ROCM_HOME
as
ROCM_HOME
cuda_info
=
{
"ROCM_HOME"
:
ROCM_HOME
}
def
get_info
(
self
):
cuda_info
=
{
"CUDA available"
:
torch
.
cuda
.
is_available
()}
if
ROCM_HOME
and
os
.
path
.
isdir
(
ROCM_HOME
)
:
if
cuda_info
[
"CUDA available"
]
:
cuda_info
.
update
(
_get_nvcc
_info
())
cuda_info
.
update
(
self
.
get_device
_info
())
cuda_info
.
update
(
_get_cuda_
driver_
version
())
cuda_info
.
update
(
self
.
_get_cuda_version
_info
())
return
cuda_info
return
cuda_info
else
:
cuda_info
=
{
"CUDA_HOME"
:
""
}
return
cuda_info
def
_get_cuda_version_info
(
self
):
"""
Get CUDA version information.
"""
from
torch.utils.cpp_extension
import
CUDA_HOME
cuda_info
=
{
"CUDA_HOME"
:
CUDA_HOME
}
def
_get_nvcc_info
():
if
CUDA_HOME
and
os
.
path
.
isdir
(
CUDA_HOME
):
cuda_info
.
update
(
self
.
_get_nvcc_info
())
cuda_info
.
update
(
self
.
_get_cuda_driver_version
())
return
cuda_info
def
_get_nvcc_info
(
self
):
"""
"""
Get NVCC version information.
Get NVCC version information.
"""
"""
if
is_cuda_v2
():
from
torch.utils.cpp_extension
import
CUDA_HOME
from
torch.utils.cpp_extension
import
CUDA_HOME
try
:
try
:
...
@@ -167,33 +188,12 @@ def _get_nvcc_info():
...
@@ -167,33 +188,12 @@ def _get_nvcc_info():
}
}
except
subprocess
.
SubprocessError
:
except
subprocess
.
SubprocessError
:
return
{
"NVCC"
:
"Not Available"
}
return
{
"NVCC"
:
"Not Available"
}
elif
is_hip
():
from
torch.utils.cpp_extension
import
ROCM_HOME
try
:
hipcc
=
os
.
path
.
join
(
ROCM_HOME
,
"bin/hipcc"
)
hipcc_output
=
(
subprocess
.
check_output
(
f
'"
{
hipcc
}
" --version'
,
shell
=
True
)
.
decode
(
"utf-8"
)
.
strip
()
)
return
{
"HIPCC"
:
hipcc_output
[
hipcc_output
.
rfind
(
"HIP version"
)
:
hipcc_output
.
rfind
(
"AMD clang"
)
].
strip
()
}
except
subprocess
.
SubprocessError
:
return
{
"HIPCC"
:
"Not Available"
}
else
:
return
{
"NVCC"
:
"Not Available"
}
def
_get_cuda_driver_version
():
def
_get_cuda_driver_version
(
self
):
"""
"""
Get CUDA driver version.
Get CUDA driver version.
"""
"""
versions
=
set
()
versions
=
set
()
if
is_cuda_v2
():
try
:
try
:
output
=
subprocess
.
check_output
(
output
=
subprocess
.
check_output
(
[
[
...
@@ -209,7 +209,70 @@ def _get_cuda_driver_version():
...
@@ -209,7 +209,70 @@ def _get_cuda_driver_version():
return
{
"CUDA Driver Versions"
:
", "
.
join
(
sorted
(
versions
))}
return
{
"CUDA Driver Versions"
:
", "
.
join
(
sorted
(
versions
))}
except
subprocess
.
SubprocessError
:
except
subprocess
.
SubprocessError
:
return
{
"CUDA Driver Version"
:
"Not Available"
}
return
{
"CUDA Driver Version"
:
"Not Available"
}
elif
is_hip
():
def
get_topology
(
self
):
"""
Get GPU topology information.
"""
try
:
result
=
subprocess
.
run
(
[
"nvidia-smi"
,
"topo"
,
"-m"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
check
=
True
,
)
return
{
"NVIDIA Topology"
:
(
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
)
}
except
subprocess
.
SubprocessError
:
return
{}
class
HIPEnv
(
BaseEnv
):
"""Environment checker for ROCm/HIP"""
def
get_info
(
self
):
cuda_info
=
{
"ROCM available"
:
torch
.
cuda
.
is_available
()}
if
cuda_info
[
"ROCM available"
]:
cuda_info
.
update
(
self
.
get_device_info
())
cuda_info
.
update
(
self
.
_get_cuda_version_info
())
return
cuda_info
def
_get_cuda_version_info
(
self
):
from
torch.utils.cpp_extension
import
ROCM_HOME
as
ROCM_HOME
cuda_info
=
{
"ROCM_HOME"
:
ROCM_HOME
}
if
ROCM_HOME
and
os
.
path
.
isdir
(
ROCM_HOME
):
cuda_info
.
update
(
self
.
_get_hipcc_info
())
cuda_info
.
update
(
self
.
_get_rocm_driver_version
())
return
cuda_info
def
_get_hipcc_info
(
self
):
from
torch.utils.cpp_extension
import
ROCM_HOME
try
:
hipcc
=
os
.
path
.
join
(
ROCM_HOME
,
"bin/hipcc"
)
hipcc_output
=
(
subprocess
.
check_output
(
f
'"
{
hipcc
}
" --version'
,
shell
=
True
)
.
decode
(
"utf-8"
)
.
strip
()
)
return
{
"HIPCC"
:
hipcc_output
[
hipcc_output
.
rfind
(
"HIP version"
)
:
hipcc_output
.
rfind
(
"AMD clang"
)
].
strip
()
}
except
subprocess
.
SubprocessError
:
return
{
"HIPCC"
:
"Not Available"
}
def
_get_rocm_driver_version
(
self
):
try
:
try
:
output
=
subprocess
.
check_output
(
output
=
subprocess
.
check_output
(
[
[
...
@@ -226,27 +289,8 @@ def _get_cuda_driver_version():
...
@@ -226,27 +289,8 @@ def _get_cuda_driver_version():
return
{
"ROCM Driver Version"
:
ver
}
return
{
"ROCM Driver Version"
:
ver
}
except
subprocess
.
SubprocessError
:
except
subprocess
.
SubprocessError
:
return
{
"ROCM Driver Version"
:
"Not Available"
}
return
{
"ROCM Driver Version"
:
"Not Available"
}
else
:
return
{
"CUDA Driver Version"
:
"Not Available"
}
def
get_topology
(
self
):
def
get_gpu_topology
():
"""
Get GPU topology information.
"""
if
is_cuda_v2
():
try
:
result
=
subprocess
.
run
(
[
"nvidia-smi"
,
"topo"
,
"-m"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
check
=
True
,
)
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
except
subprocess
.
SubprocessError
:
return
None
elif
is_hip
():
try
:
try
:
result
=
subprocess
.
run
(
result
=
subprocess
.
run
(
[
"rocm-smi"
,
"--showtopotype"
],
[
"rocm-smi"
,
"--showtopotype"
],
...
@@ -255,51 +299,127 @@ def get_gpu_topology():
...
@@ -255,51 +299,127 @@ def get_gpu_topology():
text
=
True
,
text
=
True
,
check
=
True
,
check
=
True
,
)
)
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
return
{
"AMD Topology"
:
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
}
except
subprocess
.
SubprocessError
:
except
subprocess
.
SubprocessError
:
return
None
return
{}
else
:
return
None
def
get_hypervisor_vendor
():
class
NPUEnv
(
BaseEnv
):
try
:
"""Environment checker for Ascend NPU"""
output
=
subprocess
.
check_output
([
"lscpu"
],
text
=
True
)
for
line
in
output
.
split
(
"
\n
"
):
def
__init__
(
self
):
if
"Hypervisor vendor:"
in
line
:
super
().
__init__
()
return
line
.
split
(
":"
)[
1
].
strip
()
self
.
package_list
=
[
"torch_npu"
,
"sgl-kernel-npu"
]
+
self
.
package_list
return
None
except
:
def
get_info
(
self
):
return
None
cuda_info
=
{
"NPU available"
:
torch
.
npu
.
is_available
()}
if
cuda_info
[
"NPU available"
]:
cuda_info
.
update
(
self
.
get_device_info
())
cuda_info
.
update
(
self
.
_get_cann_version_info
())
return
cuda_info
def
check_env
(
):
def
get_device_info
(
self
):
"""
"""
Check and print environment information.
Get information about available NPUs.
Need to override due to torch_npu interface differences.
"""
"""
env_info
=
OrderedDict
()
devices
=
defaultdict
(
list
)
env_info
[
"Python"
]
=
sys
.
version
.
replace
(
"
\n
"
,
""
)
for
k
in
range
(
torch
.
npu
.
device_count
()):
env_info
.
update
(
get_cuda_info
())
devices
[
torch
.
npu
.
get_device_name
(
k
)].
append
(
str
(
k
))
env_info
[
"PyTorch"
]
=
torch
.
__version__
env_info
.
update
(
get_package_versions
(
PACKAGE_LIST
))
gpu_topo
=
get_gpu_topology
()
npu_info
=
{}
if
gpu_topo
:
for
name
,
device_ids
in
devices
.
items
():
if
is_cuda_v2
():
npu_info
[
f
"NPU
{
','
.
join
(
device_ids
)
}
"
]
=
name
env_info
[
"NVIDIA Topology"
]
=
gpu_topo
elif
is_hip
():
env_info
[
"AMD Topology"
]
=
gpu_topo
hypervisor_vendor
=
get_hypervisor_vendor
()
return
npu_info
if
hypervisor_vendor
:
env_info
[
"Hypervisor vendor"
]
=
hypervisor_vendor
ulimit_soft
,
_
=
resource
.
getrlimit
(
resource
.
RLIMIT_NOFILE
)
def
_get_cann_version_info
(
self
):
env_info
[
"ulimit soft"
]
=
ulimit_soft
cann_envs
=
[
"ASCEND_TOOLKIT_HOME"
,
"ASCEND_INSTALL_PATH"
]
for
var
in
cann_envs
:
path
=
os
.
environ
.
get
(
var
)
if
path
and
os
.
path
.
exists
(
path
):
CANN_HOME
=
path
break
else
:
default_path
=
"/usr/local/Ascend/ascend-toolkit/latest"
CANN_HOME
=
default_path
if
os
.
path
.
exists
(
default_path
)
else
None
if
CANN_HOME
:
npu_info
=
{
"CANN_HOME"
:
CANN_HOME
}
npu_info
.
update
(
self
.
_get_cann_info
(
CANN_HOME
))
npu_info
.
update
(
self
.
_get_ascend_driver_version
())
return
npu_info
else
:
return
{
"CANN_HOME"
:
"Not found"
}
def
_get_cann_info
(
self
,
CANN_HOME
:
str
):
cann_info
=
{}
cann_version_file
=
os
.
path
.
join
(
CANN_HOME
,
"version.cfg"
)
if
os
.
path
.
exists
(
cann_version_file
):
with
open
(
cann_version_file
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
f
.
readline
()
# discard first line comment in version.cfg
cann_info
[
"CANN"
]
=
f
.
readline
().
split
(
"["
)[
1
].
split
(
"]"
)[
0
]
else
:
cann_info
[
"CANN"
]
=
"Not Available"
try
:
bisheng
=
os
.
path
.
join
(
CANN_HOME
,
"compiler/ccec_compiler/bin/bisheng"
)
bisheng_output
=
(
subprocess
.
check_output
([
bisheng
,
"--version"
]).
decode
(
"utf-8"
).
strip
()
)
cann_info
[
"BiSheng"
]
=
bisheng_output
.
split
(
"
\n
"
)[
0
].
strip
()
except
subprocess
.
SubprocessError
:
cann_info
[
"BiSheng"
]
=
"Not Available"
return
cann_info
for
k
,
v
in
env_info
.
items
():
def
_get_ascend_driver_version
(
self
):
print
(
f
"
{
k
}
:
{
v
}
"
)
try
:
output
=
subprocess
.
check_output
(
[
"npu-smi"
,
"info"
,
"-t"
,
"board"
,
"-i"
,
"0"
,
]
)
for
line
in
output
.
decode
().
strip
().
split
(
"
\n
"
):
if
"Software Version"
in
line
:
version
=
line
.
split
(
":"
)[
-
1
].
strip
()
break
else
:
version
=
"Not Available"
return
{
"Ascend Driver Version"
:
version
}
except
subprocess
.
SubprocessError
:
return
{
"Ascend Driver Version"
:
"Not Available"
}
def
get_topology
(
self
):
try
:
result
=
subprocess
.
run
(
[
"npu-smi"
,
"info"
,
"-t"
,
"topo"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
check
=
True
,
)
return
{
"Ascend Topology"
:
(
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
)
}
except
subprocess
.
SubprocessError
:
return
{}
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
check_env
()
if
is_cuda_v2
():
env
=
GPUEnv
()
elif
is_hip
():
env
=
HIPEnv
()
elif
is_npu
():
env
=
NPUEnv
()
env
.
check_env
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment