Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
9ca5e7a9
Unverified
Commit
9ca5e7a9
authored
Apr 20, 2026
by
one
Committed by
GitHub
Apr 20, 2026
Browse files
Support GPU system info collection via hy-smi (#4)
* Support GPU system info collection via hy-smi * Fix typos in docs
parent
6d08a565
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
64 additions
and
5 deletions
+64
-5
docs/user-tutorial/system-config.md
docs/user-tutorial/system-config.md
+2
-2
superbench/tools/system_info.py
superbench/tools/system_info.py
+62
-3
No files found.
docs/user-tutorial/system-config.md
View file @
9ca5e7a9
...
...
@@ -22,7 +22,7 @@ This tool is to collect the system information automatically on the tested GPU n
2.
Start to collect the sys info using
`sb node info --output-dir ${output-dir}`
command using root privilege.
3.
After the command finished, you can find the output system info json file
`sys
-
info.json`
of local node under
\$
{output_dir}.
3.
After the command finished, you can find the output system info json file
`sys
_
info.json`
of local node under
\$
{output_dir}.
### Usage on multiple remote machines
...
...
@@ -38,7 +38,7 @@ This tool is to collect the system information automatically on the tested GPU n
sb run --get-info -f host.ini --output-dir ${output-dir} -C superbench.enable=none
```
4.
After the command finished, you can find the output system info json file
`sys
-
info.json`
of each node under
\$
{output_dir}/nodes/${node_name}.
4.
After the command finished, you can find the output system info json file
`sys
_
info.json`
of each node under
\$
{output_dir}/nodes/${node_name}.
## Parameter and Details
...
...
superbench/tools/system_info.py
View file @
9ca5e7a9
...
...
@@ -286,11 +286,69 @@ def get_gpu_amd(self):
return
gpu_dict
def
_merge_hygon_gpu_json_info
(
self
,
gpu_info
,
command
):
"""Merge Hygon GPU info from json command output.
Args:
gpu_info (dict): GPU info keyed by card id.
command (str): Command to get GPU info in json format.
"""
command_output
=
self
.
_run_cmd
(
command
)
command_info
=
json
.
loads
(
command_output
)
for
card
,
card_info
in
command_info
.
items
():
if
not
card
.
startswith
(
'card'
):
continue
if
card
not
in
gpu_info
:
gpu_info
[
card
]
=
{}
for
key
,
value
in
card_info
.
items
():
if
key
:
gpu_info
[
card
][
key
]
=
value
def
get_gpu_hygon
(
self
):
"""Get hygon gpu info."""
gpu_dict
=
self
.
get_gpu_amd
()
if
gpu_dict
:
gpu_dict
[
'accelerator_vendor'
]
=
'hygon'
gpu_dict
=
{
'accelerator_vendor'
:
'hygon'
,
'rocm_info'
:
{},
}
hygon_json_info_options
=
[
'--showid'
,
'--showproductname'
,
'--showserial'
,
'--showvbios'
,
'--showfwinfo'
,
'--showbus'
,
'--showtoponuma'
,
'--showreplaycount'
,
'--showmeminfo vram'
,
'--showmemavailable'
,
'--showmemvendor'
,
'--showmemuse'
,
'--showmemeccinfo'
,
'--showmemoverdrive'
,
'--showclocks'
,
'--showperflevel'
,
'--showoverdrive'
,
'--showpower'
,
'--showmaxpower'
,
'--showvoltage'
,
'--showtemp'
,
'--showuse'
,
'--showbw'
,
]
for
option
in
hygon_json_info_options
:
command
=
'hy-smi --json {}'
.
format
(
option
)
try
:
self
.
_merge_hygon_gpu_json_info
(
gpu_dict
[
'rocm_info'
],
command
)
except
Exception
:
logger
.
exception
(
'Error: get hygon gpu info failed with command: %s'
,
command
)
try
:
gpu_dict
[
'topo'
]
=
self
.
_run_cmd
(
'hy-smi --showtopo'
)
except
Exception
:
logger
.
exception
(
'Error: get hygon gpu topology info failed'
)
gpu_dict
[
'gpu_count'
]
=
len
(
gpu_dict
[
'rocm_info'
])
return
gpu_dict
def
get_gpu
(
self
):
...
...
@@ -430,6 +488,7 @@ def get_nic(self):
nic_list
.
append
(
nic_info
)
except
Exception
:
logger
.
exception
(
'Error: get nic info failed'
)
return
nic_list
def
get_network
(
self
):
"""Get network info, including nic info, ib info and ofed version.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment