Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
511807b7
Unverified
Commit
511807b7
authored
Apr 21, 2026
by
one
Committed by
GitHub
Apr 21, 2026
Browse files
Config: Update config files (#7)
- Add BW150 config - Update BW1000 config - Merge summary rules
parent
0993db75
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
687 additions
and
98 deletions
+687
-98
superbench/config/hygon_bw1000.yaml
superbench/config/hygon_bw1000.yaml
+44
-53
superbench/config/hygon_bw1000_summary.yaml
superbench/config/hygon_bw1000_summary.yaml
+0
-45
superbench/config/hygon_bw150.yaml
superbench/config/hygon_bw150.yaml
+340
-0
superbench/config/hygon_bw_summary.yaml
superbench/config/hygon_bw_summary.yaml
+303
-0
No files found.
superbench/config/hygon_bw1000.yaml
View file @
511807b7
...
@@ -6,7 +6,7 @@ superbench:
...
@@ -6,7 +6,7 @@ superbench:
enable
:
false
enable
:
false
var
:
var
:
default_local_mode
:
&default_local_mode
default_local_mode
:
&default_local_mode
enable
:
fals
e
enable
:
tru
e
modes
:
modes
:
-
name
:
local
-
name
:
local
proc_num
:
8
proc_num
:
8
...
@@ -44,23 +44,23 @@ superbench:
...
@@ -44,23 +44,23 @@ superbench:
n
:
8192
n
:
8192
k
:
8192
k
:
8192
hipblaslt-gemm
:
hipblaslt-gemm
:
enable
:
fals
e
enable
:
tru
e
modes
:
modes
:
-
name
:
local
-
name
:
local
proc_num
:
8
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
yes
parallel
:
yes
parameters
:
parameters
:
in_types
:
[
"
fp32"
,
"
fp16"
,
"
bf16"
]
in_types
:
[
"
fp32"
,
"
fp16"
,
"
bf16"
]
tolerant_fail
:
yes
tolerant_fail
:
yes
num_warmup
:
100
num_warmup
:
100
num_steps
:
1000
num_steps
:
1000
shapes
:
shapes
:
-
4096,4096,4096
-
4096,4096,4096
-
8192,8192,8192
-
8192,8192,8192
-
16384,16384,16384
-
16384,16384,16384
gpu-stream
:
gpu-stream
:
enable
:
fals
e
enable
:
tru
e
modes
:
modes
:
-
name
:
local
-
name
:
local
proc_num
:
8
proc_num
:
8
...
@@ -71,7 +71,7 @@ superbench:
...
@@ -71,7 +71,7 @@ superbench:
num_loops
:
100
num_loops
:
100
precision
:
double
precision
:
double
rccl-bw:allreduce-r16:
rccl-bw:allreduce-r16:
enable
:
tru
e
enable
:
fals
e
modes
:
modes
:
-
name
:
mpi
-
name
:
mpi
proc_num
:
8
proc_num
:
8
...
@@ -92,57 +92,37 @@ superbench:
...
@@ -92,57 +92,37 @@ superbench:
maxbytes
:
16G
maxbytes
:
16G
ngpus
:
1
ngpus
:
1
operation
:
allreduce
operation
:
allreduce
rccl-bw:allreduce-r8
-pcie
:
rccl-bw:allreduce-r8:
enable
:
true
enable
:
true
modes
:
modes
:
-
name
:
mpi
-
name
:
mpi
proc_num
:
8
proc_num
:
8
node_num
:
1
node_num
:
1
bind_to
:
none
bind_to
:
none
mca
:
pml
:
ucx
btl
:
^openib
btl_tcp_if_exclude
:
lo,docker0
coll_hcoll_enable
:
0
env
:
env
:
ROCM_PATH
:
/opt/dtk
HSA_FORCE_FINE_GRAIN_PCIE
:
1
NCCL_SOCKET_IFNAME
:
p14p2
NCCL_NET_GDR_LEVEL
:
PHB
NCCL_NET_GDR_READ
:
1
NCCL_BUFFSIZE
:
4194304
NCCL_BUFFSIZE
:
4194304
NCCL_SIMPLE_CHANNELS
:
32
RCCL_P2P_XHCL_CHANNEL_NUM
:
31
RCCL_COLL_XHCL_CHANNEL_NUM
:
28
parameters
:
parameters
:
maxbytes
:
16G
maxbytes
:
16G
ngpus
:
1
ngpus
:
1
operation
:
allreduce
operation
:
allreduce
rccl-bw:allreduce-r8:
rccl-bw:allreduce-r8
-pcie
:
enable
:
fals
e
enable
:
tru
e
modes
:
modes
:
-
name
:
mpi
-
name
:
mpi
proc_num
:
8
proc_num
:
8
node_num
:
1
node_num
:
1
bind_to
:
none
bind_to
:
none
mca
:
pml
:
ucx
btl
:
^openib
btl_tcp_if_exclude
:
lo,docker0
coll_hcoll_enable
:
0
env
:
env
:
ROCM_PATH
:
/opt/dtk
HSA_FORCE_FINE_GRAIN_PCIE
:
1
NCCL_SOCKET_IFNAME
:
p14p2
NCCL_NET_GDR_LEVEL
:
PHB
NCCL_NET_GDR_READ
:
1
NCCL_BUFFSIZE
:
4194304
NCCL_BUFFSIZE
:
4194304
NCCL_SIMPLE_CHANNELS
:
32
RCCL_P2P_XHCL_CHANNEL_NUM
:
31
RCCL_COLL_XHCL_CHANNEL_NUM
:
28
parameters
:
parameters
:
maxbytes
:
16G
maxbytes
:
16G
ngpus
:
1
ngpus
:
1
operation
:
allreduce
operation
:
allreduce
rccl-bw:alltoall-r16:
rccl-bw:alltoall-r16:
enable
:
tru
e
enable
:
fals
e
modes
:
modes
:
-
name
:
mpi
-
name
:
mpi
proc_num
:
8
proc_num
:
8
...
@@ -163,6 +143,17 @@ superbench:
...
@@ -163,6 +143,17 @@ superbench:
maxbytes
:
16G
maxbytes
:
16G
ngpus
:
1
ngpus
:
1
operation
:
alltoall
operation
:
alltoall
rccl-bw:alltoall-r8:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
1
bind_to
:
none
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
alltoall
gpu-hpcg:r32:
gpu-hpcg:r32:
enable
:
false
enable
:
false
modes
:
modes
:
...
@@ -182,12 +173,12 @@ superbench:
...
@@ -182,12 +173,12 @@ superbench:
nx
:
560
nx
:
560
ny
:
280
ny
:
280
nz
:
280
nz
:
280
rt
:
6
0
rt
:
1
0
npx
:
4
npx
:
4
npy
:
4
npy
:
4
npz
:
2
npz
:
2
gpu-hpcg:r16:
gpu-hpcg:r16:
enable
:
tru
e
enable
:
fals
e
modes
:
modes
:
-
name
:
mpi
-
name
:
mpi
proc_num
:
8
proc_num
:
8
...
@@ -205,7 +196,7 @@ superbench:
...
@@ -205,7 +196,7 @@ superbench:
nx
:
560
nx
:
560
ny
:
280
ny
:
280
nz
:
280
nz
:
280
rt
:
0
rt
:
1
0
npx
:
4
npx
:
4
npy
:
2
npy
:
2
npz
:
2
npz
:
2
...
@@ -220,7 +211,7 @@ superbench:
...
@@ -220,7 +211,7 @@ superbench:
nx
:
560
nx
:
560
ny
:
280
ny
:
280
nz
:
280
nz
:
280
rt
:
6
0
rt
:
1
0
npx
:
2
npx
:
2
npy
:
2
npy
:
2
npz
:
2
npz
:
2
...
@@ -235,7 +226,7 @@ superbench:
...
@@ -235,7 +226,7 @@ superbench:
nx
:
560
nx
:
560
ny
:
280
ny
:
280
nz
:
280
nz
:
280
rt
:
6
0
rt
:
1
0
npx
:
2
npx
:
2
npy
:
2
npy
:
2
npz
:
1
npz
:
1
...
@@ -250,7 +241,7 @@ superbench:
...
@@ -250,7 +241,7 @@ superbench:
nx
:
560
nx
:
560
ny
:
280
ny
:
280
nz
:
280
nz
:
280
rt
:
6
0
rt
:
1
0
npx
:
2
npx
:
2
npy
:
1
npy
:
1
npz
:
1
npz
:
1
...
@@ -265,7 +256,7 @@ superbench:
...
@@ -265,7 +256,7 @@ superbench:
nx
:
560
nx
:
560
ny
:
280
ny
:
280
nz
:
280
nz
:
280
rt
:
6
0
rt
:
1
0
npx
:
1
npx
:
1
npy
:
1
npy
:
1
npz
:
1
npz
:
1
...
@@ -281,7 +272,7 @@ superbench:
...
@@ -281,7 +272,7 @@ superbench:
-
latency_matrix
-
latency_matrix
-
max_bandwidth
-
max_bandwidth
mem-bw
:
mem-bw
:
enable
:
fals
e
enable
:
tru
e
modes
:
modes
:
-
name
:
local
-
name
:
local
proc_num
:
8
proc_num
:
8
...
@@ -290,10 +281,10 @@ superbench:
...
@@ -290,10 +281,10 @@ superbench:
ib-loopback
:
ib-loopback
:
enable
:
false
enable
:
false
modes
:
modes
:
-
name
:
local
-
name
:
local
proc_num
:
16
proc_num
:
16
prefix
:
PROC_RANK={proc_rank} IB_DEVICES=0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7 numactl -N $(({proc_rank}/8)) -m $(({proc_rank}/8))
prefix
:
PROC_RANK={proc_rank} IB_DEVICES=0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7 numactl -N $(({proc_rank}/8)) -m $(({proc_rank}/8))
parallel
:
no
parallel
:
no
parameters
:
parameters
:
msg_size
:
8388608
msg_size
:
8388608
disk-benchmark
:
disk-benchmark
:
...
@@ -305,7 +296,7 @@ superbench:
...
@@ -305,7 +296,7 @@ superbench:
parameters
:
parameters
:
block_devices
:
[]
block_devices
:
[]
gpu-copy-bw:correctness:
gpu-copy-bw:correctness:
enable
:
fals
e
enable
:
tru
e
modes
:
modes
:
-
name
:
local
-
name
:
local
parallel
:
no
parallel
:
no
...
@@ -317,7 +308,7 @@ superbench:
...
@@ -317,7 +308,7 @@ superbench:
num_loops
:
1
num_loops
:
1
check_data
:
true
check_data
:
true
gpu-copy-bw:perf:
gpu-copy-bw:perf:
enable
:
fals
e
enable
:
tru
e
modes
:
modes
:
-
name
:
local
-
name
:
local
parallel
:
no
parallel
:
no
...
...
superbench/config/hygon_bw1000_summary.yaml
deleted
100644 → 0
View file @
0993db75
# SuperBench summary rules for hygon_bw1000.yaml
#
# Usage:
# sb result summary \
# --data-file <output-dir>/results-summary.jsonl \
# --rule-file superbench/config/hygon_bw1000_summary.yaml \
# --output-file-format md \
# --output-dir <summary-output-dir>
#
# Notes:
# - This file focuses on RCCL benchmarks defined in
# superbench/config/hygon_bw1000.yaml.
# - Unmatched benchmark sections are allowed. If a benchmark was not run,
# the corresponding category in the summary will be empty.
version
:
v0.12
superbench
:
rules
:
rccl_bw_allreduce_r16_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r16
metrics
:
-
rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_pcie_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r8-pcie
metrics
:
-
rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r8
metrics
:
-
rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r16_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:alltoall-r16
metrics
:
-
rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
superbench/config/hygon_bw150.yaml
0 → 100644
View file @
511807b7
# SuperBench Config
version
:
v0.12
superbench
:
enable
:
null
monitor
:
enable
:
false
var
:
default_local_mode
:
&default_local_mode
enable
:
true
modes
:
-
name
:
local
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
yes
default_pytorch_mode
:
&default_pytorch_mode
enable
:
true
modes
:
-
name
:
torch.distributed
proc_num
:
8
node_num
:
1
frameworks
:
-
pytorch
common_model_config
:
&common_model_config
model_ddp_parameter
:
&model_ddp_param
duration
:
0
num_warmup
:
128
num_steps
:
512
sample_count
:
8192
batch_size
:
128
precision
:
[
float32
,
float16
]
model_action
:
[
train
]
pin_memory
:
yes
num_workers
:
0
benchmarks
:
kernel-launch
:
<<
:
*default_local_mode
gemm-flops
:
<<
:
*default_local_mode
parameters
:
m
:
7680
n
:
8192
k
:
8192
hipblaslt-gemm
:
enable
:
true
modes
:
-
name
:
local
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
yes
parameters
:
in_types
:
[
"
fp32"
,
"
fp16"
,
"
bf16"
]
tolerant_fail
:
yes
num_warmup
:
100
num_steps
:
1000
shapes
:
-
4096,4096,4096
-
8192,8192,8192
-
16384,16384,16384
gpu-stream
:
enable
:
true
modes
:
-
name
:
local
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
yes
parameters
:
array_size
:
268435456
num_loops
:
100
precision
:
double
rccl-bw:allreduce-r8:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
1
bind_to
:
none
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:allreduce-r4:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
env
:
NCCL_BUFFSIZE
:
4194304
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:allreduce-r4-graph:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
env
:
NCCL_BUFFSIZE
:
4194304
NCCL_RINGS
:
"
0
1
2
3|0
3
2
1|0
1
3
2|0
2
3
1|0
2
1
3|0
3
1
2"
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:allreduce-r4-pcie:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
env
:
NCCL_BUFFSIZE
:
4194304
NCCL_SIMPLE_CHANNELS
:
20
RCCL_P2P_XHCL_CHANNEL_NUM
:
16
RCCL_COLL_XHCL_CHANNEL_NUM
:
16
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:allreduce-r4-graph-pcie:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
env
:
NCCL_BUFFSIZE
:
4194304
NCCL_SIMPLE_CHANNELS
:
20
RCCL_P2P_XHCL_CHANNEL_NUM
:
16
RCCL_COLL_XHCL_CHANNEL_NUM
:
16
NCCL_RINGS
:
"
0
1
2
3|0
3
2
1|0
1
3
2|0
2
3
1|0
2
1
3|0
3
1
2"
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:alltoall-r8:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
1
bind_to
:
none
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
alltoall
rccl-bw:alltoall-r4:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
alltoall
gpu-hpcg:r32:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
4
host_list
:
[
node01
,
node02
,
node03
,
node04
]
bind_to
:
none
mca
:
pml
:
ob1
btl
:
^openib
btl_tcp_if_include
:
p14p2
coll_hcoll_enable
:
0
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
4
npy
:
4
npz
:
2
gpu-hpcg:r16:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
2
host_list
:
[
node01
,
node02
]
bind_to
:
none
mca
:
pml
:
ob1
btl
:
^openib
btl_tcp_if_include
:
p14p2
coll_hcoll_enable
:
0
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
4
npy
:
2
npz
:
2
gpu-hpcg:r8:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
1
bind_to
:
none
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
2
npy
:
2
npz
:
2
gpu-hpcg:r4:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
2
npy
:
2
npz
:
1
gpu-hpcg:r2:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
2
node_num
:
1
bind_to
:
none
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
2
npy
:
1
npz
:
1
gpu-hpcg:r1:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
1
node_num
:
1
bind_to
:
none
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
1
npy
:
1
npz
:
1
cpu-memory-bw-latency
:
enable
:
false
modes
:
-
name
:
local
proc_num
:
1
parallel
:
no
parameters
:
tests
:
-
bandwidth_matrix
-
latency_matrix
-
max_bandwidth
mem-bw
:
enable
:
true
modes
:
-
name
:
local
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
no
ib-loopback
:
enable
:
false
modes
:
-
name
:
local
proc_num
:
16
prefix
:
PROC_RANK={proc_rank} IB_DEVICES=0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7 numactl -N $(({proc_rank}/8)) -m $(({proc_rank}/8))
parallel
:
no
parameters
:
msg_size
:
8388608
disk-benchmark
:
enable
:
false
modes
:
-
name
:
local
proc_num
:
1
parallel
:
no
parameters
:
block_devices
:
[]
gpu-copy-bw:correctness:
enable
:
true
modes
:
-
name
:
local
parallel
:
no
parameters
:
mem_type
:
[
htod
,
dtoh
,
dtod
,
one_to_all
,
all_to_one
,
all_to_all
]
copy_type
:
[
sm
,
dma
]
size
:
4096
num_warm_up
:
0
num_loops
:
1
check_data
:
true
gpu-copy-bw:perf:
enable
:
true
modes
:
-
name
:
local
parallel
:
no
parameters
:
mem_type
:
[
htod
,
dtoh
,
dtod
,
one_to_all
,
all_to_one
,
all_to_all
]
copy_type
:
[
sm
,
dma
]
# dist-inference:
# modes:
# - name: mpi
# proc_num: 8
# node_num: 1
# mca:
# pml: ob1
# btl: ^openib
# btl_tcp_if_exclude: lo,docker0
# coll_hcoll_enable: 0
# frameworks:
# - pytorch
# parameters:
# num_layers: 50
# num_warmup: 20
# num_steps: 100
# use_cuda_graph: true
# precision: float16
# hidden_size: 128
# input_size: 128
# batch_size: 1024
superbench/config/hygon_bw_summary.yaml
0 → 100644
View file @
511807b7
# SuperBench summary rules for Hygon BW configurations
#
# Usage:
# sb result summary \
# --data-file <output-dir>/results-summary.jsonl \
# --rule-file superbench/config/hygon_bw_summary.yaml \
# --output-file-format md \
# --output-dir <summary-output-dir>
#
# Notes:
# - This file covers common RCCL and GPU HPCG benchmark metrics used by
# Hygon BW configuration files.
# - Unmatched benchmark sections are allowed. If a benchmark was not run,
# the corresponding category in the summary will be empty.
version
:
v0.12
superbench
:
rules
:
rccl_bw_allreduce_r16_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r16
metrics
:
-
rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r8
metrics
:
-
rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_pcie_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r8-pcie
metrics
:
-
rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r4
metrics
:
-
rccl-bw:allreduce-r4/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r4/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_graph_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r4-graph
metrics
:
-
rccl-bw:allreduce-r4-graph/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r4-graph/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_pcie_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r4-pcie
metrics
:
-
rccl-bw:allreduce-r4-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r4-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_graph_pcie_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r4-graph-pcie
metrics
:
-
rccl-bw:allreduce-r4-graph-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r4-graph-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r8_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:alltoall-r8
metrics
:
-
rccl-bw:alltoall-r8/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:alltoall-r8/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r16_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:alltoall-r16
metrics
:
-
rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r4_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:alltoall-r4
metrics
:
-
rccl-bw:alltoall-r4/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:alltoall-r4/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
gpu_hpcg_r1
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r1
metrics
:
-
gpu-hpcg:r1/is_valid
-
gpu-hpcg:r1/final_gflops
-
gpu-hpcg:r1/final_bandwidth
-
gpu-hpcg:r1/final_gflops_per_process
-
gpu-hpcg:r1/final_bandwidth_per_process
-
gpu-hpcg:r1/ddot_gflops
-
gpu-hpcg:r1/ddot_bandwidth
-
gpu-hpcg:r1/ddot_gflops_per_process
-
gpu-hpcg:r1/ddot_bandwidth_per_process
-
gpu-hpcg:r1/waxpby_gflops
-
gpu-hpcg:r1/waxpby_bandwidth
-
gpu-hpcg:r1/waxpby_gflops_per_process
-
gpu-hpcg:r1/waxpby_bandwidth_per_process
-
gpu-hpcg:r1/spmv_gflops
-
gpu-hpcg:r1/spmv_bandwidth
-
gpu-hpcg:r1/spmv_gflops_per_process
-
gpu-hpcg:r1/spmv_bandwidth_per_process
-
gpu-hpcg:r1/mg_gflops
-
gpu-hpcg:r1/mg_bandwidth
-
gpu-hpcg:r1/mg_gflops_per_process
-
gpu-hpcg:r1/mg_bandwidth_per_process
-
gpu-hpcg:r1/total_gflops
-
gpu-hpcg:r1/total_bandwidth
-
gpu-hpcg:r1/total_gflops_per_process
-
gpu-hpcg:r1/total_bandwidth_per_process
-
gpu-hpcg:r1/local_domain_x
-
gpu-hpcg:r1/local_domain_y
-
gpu-hpcg:r1/local_domain_z
-
gpu-hpcg:r1/process_domain_x
-
gpu-hpcg:r1/process_domain_y
-
gpu-hpcg:r1/process_domain_z
gpu_hpcg_r2
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r2
metrics
:
-
gpu-hpcg:r2/is_valid
-
gpu-hpcg:r2/final_gflops
-
gpu-hpcg:r2/final_bandwidth
-
gpu-hpcg:r2/final_gflops_per_process
-
gpu-hpcg:r2/final_bandwidth_per_process
-
gpu-hpcg:r2/ddot_gflops
-
gpu-hpcg:r2/ddot_bandwidth
-
gpu-hpcg:r2/ddot_gflops_per_process
-
gpu-hpcg:r2/ddot_bandwidth_per_process
-
gpu-hpcg:r2/waxpby_gflops
-
gpu-hpcg:r2/waxpby_bandwidth
-
gpu-hpcg:r2/waxpby_gflops_per_process
-
gpu-hpcg:r2/waxpby_bandwidth_per_process
-
gpu-hpcg:r2/spmv_gflops
-
gpu-hpcg:r2/spmv_bandwidth
-
gpu-hpcg:r2/spmv_gflops_per_process
-
gpu-hpcg:r2/spmv_bandwidth_per_process
-
gpu-hpcg:r2/mg_gflops
-
gpu-hpcg:r2/mg_bandwidth
-
gpu-hpcg:r2/mg_gflops_per_process
-
gpu-hpcg:r2/mg_bandwidth_per_process
-
gpu-hpcg:r2/total_gflops
-
gpu-hpcg:r2/total_bandwidth
-
gpu-hpcg:r2/total_gflops_per_process
-
gpu-hpcg:r2/total_bandwidth_per_process
-
gpu-hpcg:r2/local_domain_x
-
gpu-hpcg:r2/local_domain_y
-
gpu-hpcg:r2/local_domain_z
-
gpu-hpcg:r2/process_domain_x
-
gpu-hpcg:r2/process_domain_y
-
gpu-hpcg:r2/process_domain_z
gpu_hpcg_r4
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r4
metrics
:
-
gpu-hpcg:r4/is_valid
-
gpu-hpcg:r4/final_gflops
-
gpu-hpcg:r4/final_bandwidth
-
gpu-hpcg:r4/final_gflops_per_process
-
gpu-hpcg:r4/final_bandwidth_per_process
-
gpu-hpcg:r4/ddot_gflops
-
gpu-hpcg:r4/ddot_bandwidth
-
gpu-hpcg:r4/ddot_gflops_per_process
-
gpu-hpcg:r4/ddot_bandwidth_per_process
-
gpu-hpcg:r4/waxpby_gflops
-
gpu-hpcg:r4/waxpby_bandwidth
-
gpu-hpcg:r4/waxpby_gflops_per_process
-
gpu-hpcg:r4/waxpby_bandwidth_per_process
-
gpu-hpcg:r4/spmv_gflops
-
gpu-hpcg:r4/spmv_bandwidth
-
gpu-hpcg:r4/spmv_gflops_per_process
-
gpu-hpcg:r4/spmv_bandwidth_per_process
-
gpu-hpcg:r4/mg_gflops
-
gpu-hpcg:r4/mg_bandwidth
-
gpu-hpcg:r4/mg_gflops_per_process
-
gpu-hpcg:r4/mg_bandwidth_per_process
-
gpu-hpcg:r4/total_gflops
-
gpu-hpcg:r4/total_bandwidth
-
gpu-hpcg:r4/total_gflops_per_process
-
gpu-hpcg:r4/total_bandwidth_per_process
-
gpu-hpcg:r4/local_domain_x
-
gpu-hpcg:r4/local_domain_y
-
gpu-hpcg:r4/local_domain_z
-
gpu-hpcg:r4/process_domain_x
-
gpu-hpcg:r4/process_domain_y
-
gpu-hpcg:r4/process_domain_z
gpu_hpcg_r8
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r8
metrics
:
-
gpu-hpcg:r8/is_valid
-
gpu-hpcg:r8/final_gflops
-
gpu-hpcg:r8/final_bandwidth
-
gpu-hpcg:r8/final_gflops_per_process
-
gpu-hpcg:r8/final_bandwidth_per_process
-
gpu-hpcg:r8/ddot_gflops
-
gpu-hpcg:r8/ddot_bandwidth
-
gpu-hpcg:r8/ddot_gflops_per_process
-
gpu-hpcg:r8/ddot_bandwidth_per_process
-
gpu-hpcg:r8/waxpby_gflops
-
gpu-hpcg:r8/waxpby_bandwidth
-
gpu-hpcg:r8/waxpby_gflops_per_process
-
gpu-hpcg:r8/waxpby_bandwidth_per_process
-
gpu-hpcg:r8/spmv_gflops
-
gpu-hpcg:r8/spmv_bandwidth
-
gpu-hpcg:r8/spmv_gflops_per_process
-
gpu-hpcg:r8/spmv_bandwidth_per_process
-
gpu-hpcg:r8/mg_gflops
-
gpu-hpcg:r8/mg_bandwidth
-
gpu-hpcg:r8/mg_gflops_per_process
-
gpu-hpcg:r8/mg_bandwidth_per_process
-
gpu-hpcg:r8/total_gflops
-
gpu-hpcg:r8/total_bandwidth
-
gpu-hpcg:r8/total_gflops_per_process
-
gpu-hpcg:r8/total_bandwidth_per_process
-
gpu-hpcg:r8/local_domain_x
-
gpu-hpcg:r8/local_domain_y
-
gpu-hpcg:r8/local_domain_z
-
gpu-hpcg:r8/process_domain_x
-
gpu-hpcg:r8/process_domain_y
-
gpu-hpcg:r8/process_domain_z
gpu_hpcg_r16
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r16
metrics
:
-
gpu-hpcg:r16/is_valid
-
gpu-hpcg:r16/final_gflops
-
gpu-hpcg:r16/final_bandwidth
-
gpu-hpcg:r16/final_gflops_per_process
-
gpu-hpcg:r16/final_bandwidth_per_process
-
gpu-hpcg:r16/ddot_gflops
-
gpu-hpcg:r16/ddot_bandwidth
-
gpu-hpcg:r16/ddot_gflops_per_process
-
gpu-hpcg:r16/ddot_bandwidth_per_process
-
gpu-hpcg:r16/waxpby_gflops
-
gpu-hpcg:r16/waxpby_bandwidth
-
gpu-hpcg:r16/waxpby_gflops_per_process
-
gpu-hpcg:r16/waxpby_bandwidth_per_process
-
gpu-hpcg:r16/spmv_gflops
-
gpu-hpcg:r16/spmv_bandwidth
-
gpu-hpcg:r16/spmv_gflops_per_process
-
gpu-hpcg:r16/spmv_bandwidth_per_process
-
gpu-hpcg:r16/mg_gflops
-
gpu-hpcg:r16/mg_bandwidth
-
gpu-hpcg:r16/mg_gflops_per_process
-
gpu-hpcg:r16/mg_bandwidth_per_process
-
gpu-hpcg:r16/total_gflops
-
gpu-hpcg:r16/total_bandwidth
-
gpu-hpcg:r16/total_gflops_per_process
-
gpu-hpcg:r16/total_bandwidth_per_process
-
gpu-hpcg:r16/local_domain_x
-
gpu-hpcg:r16/local_domain_y
-
gpu-hpcg:r16/local_domain_z
-
gpu-hpcg:r16/process_domain_x
-
gpu-hpcg:r16/process_domain_y
-
gpu-hpcg:r16/process_domain_z
gpu_hpcg_r32
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r32
metrics
:
-
gpu-hpcg:r32/is_valid
-
gpu-hpcg:r32/final_gflops
-
gpu-hpcg:r32/final_bandwidth
-
gpu-hpcg:r32/final_gflops_per_process
-
gpu-hpcg:r32/final_bandwidth_per_process
-
gpu-hpcg:r32/ddot_gflops
-
gpu-hpcg:r32/ddot_bandwidth
-
gpu-hpcg:r32/ddot_gflops_per_process
-
gpu-hpcg:r32/ddot_bandwidth_per_process
-
gpu-hpcg:r32/waxpby_gflops
-
gpu-hpcg:r32/waxpby_bandwidth
-
gpu-hpcg:r32/waxpby_gflops_per_process
-
gpu-hpcg:r32/waxpby_bandwidth_per_process
-
gpu-hpcg:r32/spmv_gflops
-
gpu-hpcg:r32/spmv_bandwidth
-
gpu-hpcg:r32/spmv_gflops_per_process
-
gpu-hpcg:r32/spmv_bandwidth_per_process
-
gpu-hpcg:r32/mg_gflops
-
gpu-hpcg:r32/mg_bandwidth
-
gpu-hpcg:r32/mg_gflops_per_process
-
gpu-hpcg:r32/mg_bandwidth_per_process
-
gpu-hpcg:r32/total_gflops
-
gpu-hpcg:r32/total_bandwidth
-
gpu-hpcg:r32/total_gflops_per_process
-
gpu-hpcg:r32/total_bandwidth_per_process
-
gpu-hpcg:r32/local_domain_x
-
gpu-hpcg:r32/local_domain_y
-
gpu-hpcg:r32/local_domain_z
-
gpu-hpcg:r32/process_domain_x
-
gpu-hpcg:r32/process_domain_y
-
gpu-hpcg:r32/process_domain_z
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment