Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
800b962a
Unverified
Commit
800b962a
authored
Apr 20, 2026
by
one
Committed by
GitHub
Apr 20, 2026
Browse files
Update mem-bw to use BandwidthTest (#5)
* Update mem-bw to use BandwidthTest * Update config and format code
parent
9ca5e7a9
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
215 additions
and
2 deletions
+215
-2
superbench/benchmarks/micro_benchmarks/__init__.py
superbench/benchmarks/micro_benchmarks/__init__.py
+2
-0
superbench/benchmarks/micro_benchmarks/dtk_memory_bw_performance.py
.../benchmarks/micro_benchmarks/dtk_memory_bw_performance.py
+92
-0
superbench/benchmarks/micro_benchmarks/rocm_memory_bw_performance.py
...benchmarks/micro_benchmarks/rocm_memory_bw_performance.py
+0
-1
superbench/config/hygon_bw1000.yaml
superbench/config/hygon_bw1000.yaml
+1
-1
tests/benchmarks/micro_benchmarks/test_dtk_memory_bw_performance.py
...hmarks/micro_benchmarks/test_dtk_memory_bw_performance.py
+86
-0
tests/data/dtk_memory_d2d_bw.log
tests/data/dtk_memory_d2d_bw.log
+11
-0
tests/data/dtk_memory_d2h_bw.log
tests/data/dtk_memory_d2h_bw.log
+11
-0
tests/data/dtk_memory_h2d_bw.log
tests/data/dtk_memory_h2d_bw.log
+12
-0
No files found.
superbench/benchmarks/micro_benchmarks/__init__.py
View file @
800b962a
...
@@ -14,6 +14,7 @@ from superbench.benchmarks.micro_benchmarks.blaslt_function_base import BlasLtBa
...
@@ -14,6 +14,7 @@ from superbench.benchmarks.micro_benchmarks.blaslt_function_base import BlasLtBa
from
superbench.benchmarks.micro_benchmarks.cublaslt_function
import
CublasLtBenchmark
from
superbench.benchmarks.micro_benchmarks.cublaslt_function
import
CublasLtBenchmark
from
superbench.benchmarks.micro_benchmarks.rocm_hipblaslt_function
import
RocmHipBlasLtBenchmark
from
superbench.benchmarks.micro_benchmarks.rocm_hipblaslt_function
import
RocmHipBlasLtBenchmark
from
superbench.benchmarks.micro_benchmarks.dtk_hipblaslt_function
import
DtkHipBlasLtBenchmark
from
superbench.benchmarks.micro_benchmarks.dtk_hipblaslt_function
import
DtkHipBlasLtBenchmark
from
superbench.benchmarks.micro_benchmarks.dtk_memory_bw_performance
import
DtkMemBwBenchmark
from
superbench.benchmarks.micro_benchmarks.dtk_gemm_flops_performance
import
DtkGemmFlopsBenchmark
from
superbench.benchmarks.micro_benchmarks.dtk_gemm_flops_performance
import
DtkGemmFlopsBenchmark
from
superbench.benchmarks.micro_benchmarks.dtk_hpcg_performance
import
DtkHpcgBenchmark
from
superbench.benchmarks.micro_benchmarks.dtk_hpcg_performance
import
DtkHpcgBenchmark
from
superbench.benchmarks.micro_benchmarks.cuda_gemm_flops_performance
import
CudaGemmFlopsBenchmark
from
superbench.benchmarks.micro_benchmarks.cuda_gemm_flops_performance
import
CudaGemmFlopsBenchmark
...
@@ -61,6 +62,7 @@ __all__ = [
...
@@ -61,6 +62,7 @@ __all__ = [
'DtkGemmFlopsBenchmark'
,
'DtkGemmFlopsBenchmark'
,
'RocmHipBlasLtBenchmark'
,
'RocmHipBlasLtBenchmark'
,
'DtkHipBlasLtBenchmark'
,
'DtkHipBlasLtBenchmark'
,
'DtkMemBwBenchmark'
,
'GPCNetBenchmark'
,
'GPCNetBenchmark'
,
'GemmFlopsBenchmark'
,
'GemmFlopsBenchmark'
,
'GpuBurnBenchmark'
,
'GpuBurnBenchmark'
,
...
...
superbench/benchmarks/micro_benchmarks/dtk_memory_bw_performance.py
0 → 100644
View file @
800b962a
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Module of the DTK memory performance benchmarks."""
import
os
import
re
from
superbench.common.utils
import
logger
from
superbench.benchmarks
import
BenchmarkRegistry
,
Platform
from
superbench.benchmarks.micro_benchmarks
import
MemBwBenchmark
class
DtkMemBwBenchmark
(
MemBwBenchmark
):
"""The DTK memory performance benchmark class."""
def
__init__
(
self
,
name
,
parameters
=
''
):
"""Constructor.
Args:
name (str): benchmark name.
parameters (str): benchmark parameters.
"""
super
().
__init__
(
name
,
parameters
)
self
.
_bin_name
=
'BandwidthTest'
self
.
_type_map
=
{
'htod'
:
0
,
'dtoh'
:
1
,
'dtod'
:
2
}
self
.
_mode_map
=
{
'pinned'
:
0
,
'unpinned'
:
1
}
def
_preprocess
(
self
):
"""Preprocess/preparation operations before the benchmarking.
Return:
True if _preprocess() succeed.
"""
if
not
super
().
_preprocess
():
return
False
# SuperBench runs one process per visible GPU. Select index 0 inside that visibility mask.
for
mem_type
in
self
.
_args
.
mem_type
:
command
=
os
.
path
.
join
(
self
.
_args
.
bin_dir
,
self
.
_bin_name
)
command
+=
' --type {} --index 0'
.
format
(
self
.
_type_map
[
mem_type
])
if
mem_type
!=
'dtod'
:
command
+=
' --mode {}'
.
format
(
self
.
_mode_map
[
self
.
_args
.
memory
])
self
.
_commands
.
append
(
command
)
return
True
def
_process_raw_result
(
self
,
cmd_idx
,
raw_output
):
"""Function to parse raw results and save the summarized results.
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
Args:
cmd_idx (int): the index of command corresponding with the raw_output.
raw_output (str): raw output string of the micro-benchmark.
Return:
True if the raw output string is valid and result can be extracted.
"""
self
.
_result
.
add_raw_data
(
'raw_output_'
+
self
.
_args
.
mem_type
[
cmd_idx
],
raw_output
,
self
.
_args
.
log_raw_data
)
mem_bw
=
-
1
valid
=
True
number
=
r
'[-+]?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?'
row_pattern
=
re
.
compile
(
r
'^\s*\d+(?:\.\d+)?\s*(?:B|KB|MB|GB)\s+'
r
'({number})\s+({number})\s+({number})\s+({number})\s+({number})\s+({number})\s*$'
.
format
(
number
=
number
),
re
.
IGNORECASE
,
)
try
:
metric
=
self
.
_metrics
[
self
.
_mem_types
.
index
(
self
.
_args
.
mem_type
[
cmd_idx
])]
for
line
in
raw_output
.
splitlines
():
match
=
row_pattern
.
match
(
line
)
if
match
:
mem_bw
=
max
(
mem_bw
,
float
(
match
.
group
(
2
)))
except
BaseException
:
valid
=
False
finally
:
if
valid
is
False
or
mem_bw
==
-
1
:
logger
.
error
(
'The result format is invalid - round: {}, benchmark: {}, raw output: {}.'
.
format
(
self
.
_curr_run_index
,
self
.
_name
,
raw_output
)
)
return
False
self
.
_result
.
add_result
(
metric
,
mem_bw
)
return
True
BenchmarkRegistry
.
register_benchmark
(
'mem-bw'
,
DtkMemBwBenchmark
,
platform
=
Platform
.
DTK
)
superbench/benchmarks/micro_benchmarks/rocm_memory_bw_performance.py
View file @
800b962a
...
@@ -91,4 +91,3 @@ class RocmMemBwBenchmark(MemBwBenchmark):
...
@@ -91,4 +91,3 @@ class RocmMemBwBenchmark(MemBwBenchmark):
BenchmarkRegistry
.
register_benchmark
(
'mem-bw'
,
RocmMemBwBenchmark
,
platform
=
Platform
.
ROCM
)
BenchmarkRegistry
.
register_benchmark
(
'mem-bw'
,
RocmMemBwBenchmark
,
platform
=
Platform
.
ROCM
)
BenchmarkRegistry
.
register_benchmark
(
'mem-bw'
,
RocmMemBwBenchmark
,
platform
=
Platform
.
DTK
)
superbench/config/hygon_bw1000.yaml
View file @
800b962a
...
@@ -282,7 +282,7 @@ superbench:
...
@@ -282,7 +282,7 @@ superbench:
modes
:
modes
:
-
name
:
local
-
name
:
local
proc_num
:
8
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
numactl -N $(({proc_rank}/4))
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
no
parallel
:
no
ib-loopback
:
ib-loopback
:
enable
:
false
enable
:
false
...
...
tests/benchmarks/micro_benchmarks/test_dtk_memory_bw_performance.py
0 → 100644
View file @
800b962a
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for DTK mem-bw benchmark."""
import
numbers
import
unittest
from
tests.helper
import
decorator
from
tests.helper.testcase
import
BenchmarkTestCase
from
superbench.benchmarks
import
BenchmarkRegistry
,
BenchmarkType
,
ReturnCode
,
Platform
class
DtkMemBwTest
(
BenchmarkTestCase
,
unittest
.
TestCase
):
"""Test class for DTK mem-bw benchmark."""
@
classmethod
def
setUpClass
(
cls
):
"""Hook method for setting up class fixture before running tests in the class."""
super
().
setUpClass
()
cls
.
createMockEnvs
(
cls
)
cls
.
createMockFiles
(
cls
,
[
'bin/BandwidthTest'
])
@
decorator
.
load_data
(
'tests/data/dtk_memory_h2d_bw.log'
)
@
decorator
.
load_data
(
'tests/data/dtk_memory_d2h_bw.log'
)
@
decorator
.
load_data
(
'tests/data/dtk_memory_d2d_bw.log'
)
def
test_dtk_memory_bw_performance
(
self
,
raw_output_h2d
,
raw_output_d2h
,
raw_output_d2d
):
"""Test DTK mem-bw benchmark."""
benchmark_name
=
'mem-bw'
(
benchmark_class
,
predefine_params
)
=
BenchmarkRegistry
.
_BenchmarkRegistry__select_benchmark
(
benchmark_name
,
Platform
.
DTK
)
assert
(
benchmark_class
)
benchmark
=
benchmark_class
(
benchmark_name
)
ret
=
benchmark
.
_preprocess
()
assert
(
ret
is
True
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
# Check basic information.
assert
(
benchmark
)
assert
(
benchmark
.
name
==
'mem-bw'
)
assert
(
benchmark
.
type
==
BenchmarkType
.
MICRO
)
# Check command list.
expected_command
=
[
'BandwidthTest --type 0 --index 0 --mode 0'
,
'BandwidthTest --type 1 --index 0 --mode 0'
,
'BandwidthTest --type 2 --index 0'
,
]
for
i
in
range
(
len
(
expected_command
)):
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expected_command
[
i
])
# Check results and metrics.
raw_output
=
[
raw_output_h2d
,
raw_output_d2h
,
raw_output_d2d
]
for
i
,
metric
in
enumerate
([
'h2d_bw'
,
'd2h_bw'
,
'd2d_bw'
]):
assert
(
benchmark
.
_process_raw_result
(
i
,
raw_output
[
i
]))
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
assert
(
benchmark
.
result
[
'h2d_bw'
][
0
]
==
30.274
)
assert
(
benchmark
.
result
[
'd2h_bw'
][
0
]
==
32.058
)
assert
(
benchmark
.
result
[
'd2d_bw'
][
0
]
==
1431.655
)
def
test_dtk_memory_bw_performance_unpinned_command
(
self
):
"""Test DTK mem-bw unpinned command generation."""
benchmark_name
=
'mem-bw'
(
benchmark_class
,
predefine_params
)
=
BenchmarkRegistry
.
_BenchmarkRegistry__select_benchmark
(
benchmark_name
,
Platform
.
DTK
)
assert
(
benchmark_class
)
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
'--memory unpinned'
)
ret
=
benchmark
.
_preprocess
()
assert
(
ret
is
True
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
expected_command
=
[
'BandwidthTest --type 0 --index 0 --mode 1'
,
'BandwidthTest --type 1 --index 0 --mode 1'
,
'BandwidthTest --type 2 --index 0'
,
]
for
i
in
range
(
len
(
expected_command
)):
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expected_command
[
i
])
tests/data/dtk_memory_d2d_bw.log
0 → 100644
View file @
800b962a
Using event timing to calculate bandwidth
Use the following device for testing
Device: deviceId 0, PciBusID 159 Name=BW Mem=64.0GB #CUs=80 Freq=1500Mhz
===================== HIP Bandwidth Test Type: D2D =====================
Data Size Avg Time(us) Avg BW(GB/s) Min Time(us) Peak BW(GB/s) Max Time(us) Min BW(GB/s)
16 B 6.100 0.003 6.000 0.003 6.200 0.003
1 KB 6.400 0.160 6.100 0.168 6.900 0.148
1 MB 8.100 129.454 8.000 131.072 8.400 124.830
512 MB 380.000 1412.913 370.000 1451.025 390.000 1376.602
1 GB 750.000 1431.655 740.000 1451.002 760.000 1412.817
tests/data/dtk_memory_d2h_bw.log
0 → 100644
View file @
800b962a
Using event timing to calculate bandwidth
Use the following device for testing
Device: deviceId 0, PciBusID 159 Name=BW Mem=64.0GB #CUs=80 Freq=1500Mhz
===================== HIP Bandwidth Test Type: D2H with host pinned =====================
Data Size Avg Time(us) Avg BW(GB/s) Min Time(us) Peak BW(GB/s) Max Time(us) Min BW(GB/s)
16 B 10.120 0.002 10.080 0.002 10.240 0.002
1 KB 10.500 0.098 10.240 0.100 10.880 0.094
1 MB 42.000 24.967 41.500 25.267 42.900 24.444
512 MB 16800.000 31.958 16790.000 31.977 16820.000 31.920
1 GB 33500.000 32.058 33490.000 32.068 33520.000 32.039
tests/data/dtk_memory_h2d_bw.log
0 → 100644
View file @
800b962a
Using event timing to calculate bandwidth
Use the following device for testing
Device: deviceId 0, PciBusID 159 Name=BW Mem=64.0GB #CUs=80 Freq=1500Mhz
===================== HIP Bandwidth Test Type: H2D with host pinned =====================
Data Size Avg Time(us) Avg BW(GB/s) Min Time(us) Peak BW(GB/s) Max Time(us) Min BW(GB/s)
16 B 11.876 0.001 11.200 0.001 13.120 0.001
1 KB 11.591 0.088 11.040 0.093 12.960 0.079
1 MB 46.044 22.773 45.440 23.076 47.040 22.291
512 MB 17745.799 30.253 17742.546 30.259 17749.426 30.247
1 GB 35467.245 30.274 35446.537 30.292 35485.413 30.259
4 GB 142138.429 30.217 142122.604 30.220 142188.065 30.206
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment