Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
511807b7
Unverified
Commit
511807b7
authored
Apr 21, 2026
by
one
Committed by
GitHub
Apr 21, 2026
Browse files
Config: Update config files (#7)
- Add BW150 config - Update BW1000 config - Merge summary rules
parent
0993db75
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
687 additions
and
98 deletions
+687
-98
superbench/config/hygon_bw1000.yaml
superbench/config/hygon_bw1000.yaml
+44
-53
superbench/config/hygon_bw1000_summary.yaml
superbench/config/hygon_bw1000_summary.yaml
+0
-45
superbench/config/hygon_bw150.yaml
superbench/config/hygon_bw150.yaml
+340
-0
superbench/config/hygon_bw_summary.yaml
superbench/config/hygon_bw_summary.yaml
+303
-0
No files found.
superbench/config/hygon_bw1000.yaml
View file @
511807b7
...
...
@@ -6,7 +6,7 @@ superbench:
enable
:
false
var
:
default_local_mode
:
&default_local_mode
enable
:
fals
e
enable
:
tru
e
modes
:
-
name
:
local
proc_num
:
8
...
...
@@ -44,14 +44,14 @@ superbench:
n
:
8192
k
:
8192
hipblaslt-gemm
:
enable
:
fals
e
enable
:
tru
e
modes
:
-
name
:
local
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
yes
parameters
:
in_types
:
[
"
fp32"
,
"
fp16"
,
"
bf16"
]
in_types
:
[
"
fp32"
,
"
fp16"
,
"
bf16"
]
tolerant_fail
:
yes
num_warmup
:
100
num_steps
:
1000
...
...
@@ -60,7 +60,7 @@ superbench:
-
8192,8192,8192
-
16384,16384,16384
gpu-stream
:
enable
:
fals
e
enable
:
tru
e
modes
:
-
name
:
local
proc_num
:
8
...
...
@@ -71,7 +71,7 @@ superbench:
num_loops
:
100
precision
:
double
rccl-bw:allreduce-r16:
enable
:
tru
e
enable
:
fals
e
modes
:
-
name
:
mpi
proc_num
:
8
...
...
@@ -92,57 +92,37 @@ superbench:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:allreduce-r8
-pcie
:
rccl-bw:allreduce-r8:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
1
bind_to
:
none
mca
:
pml
:
ucx
btl
:
^openib
btl_tcp_if_exclude
:
lo,docker0
coll_hcoll_enable
:
0
env
:
ROCM_PATH
:
/opt/dtk
HSA_FORCE_FINE_GRAIN_PCIE
:
1
NCCL_SOCKET_IFNAME
:
p14p2
NCCL_NET_GDR_LEVEL
:
PHB
NCCL_NET_GDR_READ
:
1
NCCL_BUFFSIZE
:
4194304
NCCL_SIMPLE_CHANNELS
:
32
RCCL_P2P_XHCL_CHANNEL_NUM
:
31
RCCL_COLL_XHCL_CHANNEL_NUM
:
28
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:allreduce-r8:
enable
:
fals
e
rccl-bw:allreduce-r8
-pcie
:
enable
:
tru
e
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
1
bind_to
:
none
mca
:
pml
:
ucx
btl
:
^openib
btl_tcp_if_exclude
:
lo,docker0
coll_hcoll_enable
:
0
env
:
ROCM_PATH
:
/opt/dtk
HSA_FORCE_FINE_GRAIN_PCIE
:
1
NCCL_SOCKET_IFNAME
:
p14p2
NCCL_NET_GDR_LEVEL
:
PHB
NCCL_NET_GDR_READ
:
1
NCCL_BUFFSIZE
:
4194304
NCCL_SIMPLE_CHANNELS
:
32
RCCL_P2P_XHCL_CHANNEL_NUM
:
31
RCCL_COLL_XHCL_CHANNEL_NUM
:
28
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:alltoall-r16:
enable
:
tru
e
enable
:
fals
e
modes
:
-
name
:
mpi
proc_num
:
8
...
...
@@ -163,6 +143,17 @@ superbench:
maxbytes
:
16G
ngpus
:
1
operation
:
alltoall
rccl-bw:alltoall-r8:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
1
bind_to
:
none
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
alltoall
gpu-hpcg:r32:
enable
:
false
modes
:
...
...
@@ -182,12 +173,12 @@ superbench:
nx
:
560
ny
:
280
nz
:
280
rt
:
6
0
rt
:
1
0
npx
:
4
npy
:
4
npz
:
2
gpu-hpcg:r16:
enable
:
tru
e
enable
:
fals
e
modes
:
-
name
:
mpi
proc_num
:
8
...
...
@@ -205,7 +196,7 @@ superbench:
nx
:
560
ny
:
280
nz
:
280
rt
:
0
rt
:
1
0
npx
:
4
npy
:
2
npz
:
2
...
...
@@ -220,7 +211,7 @@ superbench:
nx
:
560
ny
:
280
nz
:
280
rt
:
6
0
rt
:
1
0
npx
:
2
npy
:
2
npz
:
2
...
...
@@ -235,7 +226,7 @@ superbench:
nx
:
560
ny
:
280
nz
:
280
rt
:
6
0
rt
:
1
0
npx
:
2
npy
:
2
npz
:
1
...
...
@@ -250,7 +241,7 @@ superbench:
nx
:
560
ny
:
280
nz
:
280
rt
:
6
0
rt
:
1
0
npx
:
2
npy
:
1
npz
:
1
...
...
@@ -265,7 +256,7 @@ superbench:
nx
:
560
ny
:
280
nz
:
280
rt
:
6
0
rt
:
1
0
npx
:
1
npy
:
1
npz
:
1
...
...
@@ -281,7 +272,7 @@ superbench:
-
latency_matrix
-
max_bandwidth
mem-bw
:
enable
:
fals
e
enable
:
tru
e
modes
:
-
name
:
local
proc_num
:
8
...
...
@@ -305,7 +296,7 @@ superbench:
parameters
:
block_devices
:
[]
gpu-copy-bw:correctness:
enable
:
fals
e
enable
:
tru
e
modes
:
-
name
:
local
parallel
:
no
...
...
@@ -317,7 +308,7 @@ superbench:
num_loops
:
1
check_data
:
true
gpu-copy-bw:perf:
enable
:
fals
e
enable
:
tru
e
modes
:
-
name
:
local
parallel
:
no
...
...
superbench/config/hygon_bw1000_summary.yaml
deleted
100644 → 0
View file @
0993db75
# SuperBench summary rules for hygon_bw1000.yaml
#
# Usage:
# sb result summary \
# --data-file <output-dir>/results-summary.jsonl \
# --rule-file superbench/config/hygon_bw1000_summary.yaml \
# --output-file-format md \
# --output-dir <summary-output-dir>
#
# Notes:
# - This file focuses on RCCL benchmarks defined in
# superbench/config/hygon_bw1000.yaml.
# - Unmatched benchmark sections are allowed. If a benchmark was not run,
# the corresponding category in the summary will be empty.
version
:
v0.12
superbench
:
rules
:
rccl_bw_allreduce_r16_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r16
metrics
:
-
rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_pcie_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r8-pcie
metrics
:
-
rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r8
metrics
:
-
rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r16_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:alltoall-r16
metrics
:
-
rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
superbench/config/hygon_bw150.yaml
0 → 100644
View file @
511807b7
# SuperBench Config
version
:
v0.12
superbench
:
enable
:
null
monitor
:
enable
:
false
var
:
default_local_mode
:
&default_local_mode
enable
:
true
modes
:
-
name
:
local
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
yes
default_pytorch_mode
:
&default_pytorch_mode
enable
:
true
modes
:
-
name
:
torch.distributed
proc_num
:
8
node_num
:
1
frameworks
:
-
pytorch
common_model_config
:
&common_model_config
model_ddp_parameter
:
&model_ddp_param
duration
:
0
num_warmup
:
128
num_steps
:
512
sample_count
:
8192
batch_size
:
128
precision
:
[
float32
,
float16
]
model_action
:
[
train
]
pin_memory
:
yes
num_workers
:
0
benchmarks
:
kernel-launch
:
<<
:
*default_local_mode
gemm-flops
:
<<
:
*default_local_mode
parameters
:
m
:
7680
n
:
8192
k
:
8192
hipblaslt-gemm
:
enable
:
true
modes
:
-
name
:
local
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
yes
parameters
:
in_types
:
[
"
fp32"
,
"
fp16"
,
"
bf16"
]
tolerant_fail
:
yes
num_warmup
:
100
num_steps
:
1000
shapes
:
-
4096,4096,4096
-
8192,8192,8192
-
16384,16384,16384
gpu-stream
:
enable
:
true
modes
:
-
name
:
local
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
yes
parameters
:
array_size
:
268435456
num_loops
:
100
precision
:
double
rccl-bw:allreduce-r8:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
1
bind_to
:
none
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:allreduce-r4:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
env
:
NCCL_BUFFSIZE
:
4194304
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:allreduce-r4-graph:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
env
:
NCCL_BUFFSIZE
:
4194304
NCCL_RINGS
:
"
0
1
2
3|0
3
2
1|0
1
3
2|0
2
3
1|0
2
1
3|0
3
1
2"
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:allreduce-r4-pcie:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
env
:
NCCL_BUFFSIZE
:
4194304
NCCL_SIMPLE_CHANNELS
:
20
RCCL_P2P_XHCL_CHANNEL_NUM
:
16
RCCL_COLL_XHCL_CHANNEL_NUM
:
16
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:allreduce-r4-graph-pcie:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
env
:
NCCL_BUFFSIZE
:
4194304
NCCL_SIMPLE_CHANNELS
:
20
RCCL_P2P_XHCL_CHANNEL_NUM
:
16
RCCL_COLL_XHCL_CHANNEL_NUM
:
16
NCCL_RINGS
:
"
0
1
2
3|0
3
2
1|0
1
3
2|0
2
3
1|0
2
1
3|0
3
1
2"
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
allreduce
rccl-bw:alltoall-r8:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
1
bind_to
:
none
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
alltoall
rccl-bw:alltoall-r4:
enable
:
true
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
parameters
:
maxbytes
:
16G
ngpus
:
1
operation
:
alltoall
gpu-hpcg:r32:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
4
host_list
:
[
node01
,
node02
,
node03
,
node04
]
bind_to
:
none
mca
:
pml
:
ob1
btl
:
^openib
btl_tcp_if_include
:
p14p2
coll_hcoll_enable
:
0
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
4
npy
:
4
npz
:
2
gpu-hpcg:r16:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
2
host_list
:
[
node01
,
node02
]
bind_to
:
none
mca
:
pml
:
ob1
btl
:
^openib
btl_tcp_if_include
:
p14p2
coll_hcoll_enable
:
0
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
4
npy
:
2
npz
:
2
gpu-hpcg:r8:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
8
node_num
:
1
bind_to
:
none
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
2
npy
:
2
npz
:
2
gpu-hpcg:r4:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
4
node_num
:
1
bind_to
:
none
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
2
npy
:
2
npz
:
1
gpu-hpcg:r2:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
2
node_num
:
1
bind_to
:
none
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
2
npy
:
1
npz
:
1
gpu-hpcg:r1:
enable
:
false
modes
:
-
name
:
mpi
proc_num
:
1
node_num
:
1
bind_to
:
none
parameters
:
nx
:
560
ny
:
280
nz
:
280
rt
:
10
npx
:
1
npy
:
1
npz
:
1
cpu-memory-bw-latency
:
enable
:
false
modes
:
-
name
:
local
proc_num
:
1
parallel
:
no
parameters
:
tests
:
-
bandwidth_matrix
-
latency_matrix
-
max_bandwidth
mem-bw
:
enable
:
true
modes
:
-
name
:
local
proc_num
:
8
prefix
:
HIP_VISIBLE_DEVICES={proc_rank}
parallel
:
no
ib-loopback
:
enable
:
false
modes
:
-
name
:
local
proc_num
:
16
prefix
:
PROC_RANK={proc_rank} IB_DEVICES=0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7 numactl -N $(({proc_rank}/8)) -m $(({proc_rank}/8))
parallel
:
no
parameters
:
msg_size
:
8388608
disk-benchmark
:
enable
:
false
modes
:
-
name
:
local
proc_num
:
1
parallel
:
no
parameters
:
block_devices
:
[]
gpu-copy-bw:correctness:
enable
:
true
modes
:
-
name
:
local
parallel
:
no
parameters
:
mem_type
:
[
htod
,
dtoh
,
dtod
,
one_to_all
,
all_to_one
,
all_to_all
]
copy_type
:
[
sm
,
dma
]
size
:
4096
num_warm_up
:
0
num_loops
:
1
check_data
:
true
gpu-copy-bw:perf:
enable
:
true
modes
:
-
name
:
local
parallel
:
no
parameters
:
mem_type
:
[
htod
,
dtoh
,
dtod
,
one_to_all
,
all_to_one
,
all_to_all
]
copy_type
:
[
sm
,
dma
]
# dist-inference:
# modes:
# - name: mpi
# proc_num: 8
# node_num: 1
# mca:
# pml: ob1
# btl: ^openib
# btl_tcp_if_exclude: lo,docker0
# coll_hcoll_enable: 0
# frameworks:
# - pytorch
# parameters:
# num_layers: 50
# num_warmup: 20
# num_steps: 100
# use_cuda_graph: true
# precision: float16
# hidden_size: 128
# input_size: 128
# batch_size: 1024
superbench/config/hygon_bw_summary.yaml
0 → 100644
View file @
511807b7
# SuperBench summary rules for Hygon BW configurations
#
# Usage:
# sb result summary \
# --data-file <output-dir>/results-summary.jsonl \
# --rule-file superbench/config/hygon_bw_summary.yaml \
# --output-file-format md \
# --output-dir <summary-output-dir>
#
# Notes:
# - This file covers common RCCL and GPU HPCG benchmark metrics used by
# Hygon BW configuration files.
# - Unmatched benchmark sections are allowed. If a benchmark was not run,
# the corresponding category in the summary will be empty.
version
:
v0.12
superbench
:
rules
:
rccl_bw_allreduce_r16_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r16
metrics
:
-
rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r8
metrics
:
-
rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_pcie_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r8-pcie
metrics
:
-
rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r4
metrics
:
-
rccl-bw:allreduce-r4/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r4/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_graph_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r4-graph
metrics
:
-
rccl-bw:allreduce-r4-graph/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r4-graph/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_pcie_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r4-pcie
metrics
:
-
rccl-bw:allreduce-r4-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r4-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_graph_pcie_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:allreduce-r4-graph-pcie
metrics
:
-
rccl-bw:allreduce-r4-graph-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:allreduce-r4-graph-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r8_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:alltoall-r8
metrics
:
-
rccl-bw:alltoall-r8/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:alltoall-r8/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r16_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:alltoall-r16
metrics
:
-
rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r4_bandwidth
:
statistics
:
mean
categories
:
RCCL rccl-bw:alltoall-r4
metrics
:
-
rccl-bw:alltoall-r4/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
-
rccl-bw:alltoall-r4/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
gpu_hpcg_r1
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r1
metrics
:
-
gpu-hpcg:r1/is_valid
-
gpu-hpcg:r1/final_gflops
-
gpu-hpcg:r1/final_bandwidth
-
gpu-hpcg:r1/final_gflops_per_process
-
gpu-hpcg:r1/final_bandwidth_per_process
-
gpu-hpcg:r1/ddot_gflops
-
gpu-hpcg:r1/ddot_bandwidth
-
gpu-hpcg:r1/ddot_gflops_per_process
-
gpu-hpcg:r1/ddot_bandwidth_per_process
-
gpu-hpcg:r1/waxpby_gflops
-
gpu-hpcg:r1/waxpby_bandwidth
-
gpu-hpcg:r1/waxpby_gflops_per_process
-
gpu-hpcg:r1/waxpby_bandwidth_per_process
-
gpu-hpcg:r1/spmv_gflops
-
gpu-hpcg:r1/spmv_bandwidth
-
gpu-hpcg:r1/spmv_gflops_per_process
-
gpu-hpcg:r1/spmv_bandwidth_per_process
-
gpu-hpcg:r1/mg_gflops
-
gpu-hpcg:r1/mg_bandwidth
-
gpu-hpcg:r1/mg_gflops_per_process
-
gpu-hpcg:r1/mg_bandwidth_per_process
-
gpu-hpcg:r1/total_gflops
-
gpu-hpcg:r1/total_bandwidth
-
gpu-hpcg:r1/total_gflops_per_process
-
gpu-hpcg:r1/total_bandwidth_per_process
-
gpu-hpcg:r1/local_domain_x
-
gpu-hpcg:r1/local_domain_y
-
gpu-hpcg:r1/local_domain_z
-
gpu-hpcg:r1/process_domain_x
-
gpu-hpcg:r1/process_domain_y
-
gpu-hpcg:r1/process_domain_z
gpu_hpcg_r2
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r2
metrics
:
-
gpu-hpcg:r2/is_valid
-
gpu-hpcg:r2/final_gflops
-
gpu-hpcg:r2/final_bandwidth
-
gpu-hpcg:r2/final_gflops_per_process
-
gpu-hpcg:r2/final_bandwidth_per_process
-
gpu-hpcg:r2/ddot_gflops
-
gpu-hpcg:r2/ddot_bandwidth
-
gpu-hpcg:r2/ddot_gflops_per_process
-
gpu-hpcg:r2/ddot_bandwidth_per_process
-
gpu-hpcg:r2/waxpby_gflops
-
gpu-hpcg:r2/waxpby_bandwidth
-
gpu-hpcg:r2/waxpby_gflops_per_process
-
gpu-hpcg:r2/waxpby_bandwidth_per_process
-
gpu-hpcg:r2/spmv_gflops
-
gpu-hpcg:r2/spmv_bandwidth
-
gpu-hpcg:r2/spmv_gflops_per_process
-
gpu-hpcg:r2/spmv_bandwidth_per_process
-
gpu-hpcg:r2/mg_gflops
-
gpu-hpcg:r2/mg_bandwidth
-
gpu-hpcg:r2/mg_gflops_per_process
-
gpu-hpcg:r2/mg_bandwidth_per_process
-
gpu-hpcg:r2/total_gflops
-
gpu-hpcg:r2/total_bandwidth
-
gpu-hpcg:r2/total_gflops_per_process
-
gpu-hpcg:r2/total_bandwidth_per_process
-
gpu-hpcg:r2/local_domain_x
-
gpu-hpcg:r2/local_domain_y
-
gpu-hpcg:r2/local_domain_z
-
gpu-hpcg:r2/process_domain_x
-
gpu-hpcg:r2/process_domain_y
-
gpu-hpcg:r2/process_domain_z
gpu_hpcg_r4
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r4
metrics
:
-
gpu-hpcg:r4/is_valid
-
gpu-hpcg:r4/final_gflops
-
gpu-hpcg:r4/final_bandwidth
-
gpu-hpcg:r4/final_gflops_per_process
-
gpu-hpcg:r4/final_bandwidth_per_process
-
gpu-hpcg:r4/ddot_gflops
-
gpu-hpcg:r4/ddot_bandwidth
-
gpu-hpcg:r4/ddot_gflops_per_process
-
gpu-hpcg:r4/ddot_bandwidth_per_process
-
gpu-hpcg:r4/waxpby_gflops
-
gpu-hpcg:r4/waxpby_bandwidth
-
gpu-hpcg:r4/waxpby_gflops_per_process
-
gpu-hpcg:r4/waxpby_bandwidth_per_process
-
gpu-hpcg:r4/spmv_gflops
-
gpu-hpcg:r4/spmv_bandwidth
-
gpu-hpcg:r4/spmv_gflops_per_process
-
gpu-hpcg:r4/spmv_bandwidth_per_process
-
gpu-hpcg:r4/mg_gflops
-
gpu-hpcg:r4/mg_bandwidth
-
gpu-hpcg:r4/mg_gflops_per_process
-
gpu-hpcg:r4/mg_bandwidth_per_process
-
gpu-hpcg:r4/total_gflops
-
gpu-hpcg:r4/total_bandwidth
-
gpu-hpcg:r4/total_gflops_per_process
-
gpu-hpcg:r4/total_bandwidth_per_process
-
gpu-hpcg:r4/local_domain_x
-
gpu-hpcg:r4/local_domain_y
-
gpu-hpcg:r4/local_domain_z
-
gpu-hpcg:r4/process_domain_x
-
gpu-hpcg:r4/process_domain_y
-
gpu-hpcg:r4/process_domain_z
gpu_hpcg_r8
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r8
metrics
:
-
gpu-hpcg:r8/is_valid
-
gpu-hpcg:r8/final_gflops
-
gpu-hpcg:r8/final_bandwidth
-
gpu-hpcg:r8/final_gflops_per_process
-
gpu-hpcg:r8/final_bandwidth_per_process
-
gpu-hpcg:r8/ddot_gflops
-
gpu-hpcg:r8/ddot_bandwidth
-
gpu-hpcg:r8/ddot_gflops_per_process
-
gpu-hpcg:r8/ddot_bandwidth_per_process
-
gpu-hpcg:r8/waxpby_gflops
-
gpu-hpcg:r8/waxpby_bandwidth
-
gpu-hpcg:r8/waxpby_gflops_per_process
-
gpu-hpcg:r8/waxpby_bandwidth_per_process
-
gpu-hpcg:r8/spmv_gflops
-
gpu-hpcg:r8/spmv_bandwidth
-
gpu-hpcg:r8/spmv_gflops_per_process
-
gpu-hpcg:r8/spmv_bandwidth_per_process
-
gpu-hpcg:r8/mg_gflops
-
gpu-hpcg:r8/mg_bandwidth
-
gpu-hpcg:r8/mg_gflops_per_process
-
gpu-hpcg:r8/mg_bandwidth_per_process
-
gpu-hpcg:r8/total_gflops
-
gpu-hpcg:r8/total_bandwidth
-
gpu-hpcg:r8/total_gflops_per_process
-
gpu-hpcg:r8/total_bandwidth_per_process
-
gpu-hpcg:r8/local_domain_x
-
gpu-hpcg:r8/local_domain_y
-
gpu-hpcg:r8/local_domain_z
-
gpu-hpcg:r8/process_domain_x
-
gpu-hpcg:r8/process_domain_y
-
gpu-hpcg:r8/process_domain_z
gpu_hpcg_r16
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r16
metrics
:
-
gpu-hpcg:r16/is_valid
-
gpu-hpcg:r16/final_gflops
-
gpu-hpcg:r16/final_bandwidth
-
gpu-hpcg:r16/final_gflops_per_process
-
gpu-hpcg:r16/final_bandwidth_per_process
-
gpu-hpcg:r16/ddot_gflops
-
gpu-hpcg:r16/ddot_bandwidth
-
gpu-hpcg:r16/ddot_gflops_per_process
-
gpu-hpcg:r16/ddot_bandwidth_per_process
-
gpu-hpcg:r16/waxpby_gflops
-
gpu-hpcg:r16/waxpby_bandwidth
-
gpu-hpcg:r16/waxpby_gflops_per_process
-
gpu-hpcg:r16/waxpby_bandwidth_per_process
-
gpu-hpcg:r16/spmv_gflops
-
gpu-hpcg:r16/spmv_bandwidth
-
gpu-hpcg:r16/spmv_gflops_per_process
-
gpu-hpcg:r16/spmv_bandwidth_per_process
-
gpu-hpcg:r16/mg_gflops
-
gpu-hpcg:r16/mg_bandwidth
-
gpu-hpcg:r16/mg_gflops_per_process
-
gpu-hpcg:r16/mg_bandwidth_per_process
-
gpu-hpcg:r16/total_gflops
-
gpu-hpcg:r16/total_bandwidth
-
gpu-hpcg:r16/total_gflops_per_process
-
gpu-hpcg:r16/total_bandwidth_per_process
-
gpu-hpcg:r16/local_domain_x
-
gpu-hpcg:r16/local_domain_y
-
gpu-hpcg:r16/local_domain_z
-
gpu-hpcg:r16/process_domain_x
-
gpu-hpcg:r16/process_domain_y
-
gpu-hpcg:r16/process_domain_z
gpu_hpcg_r32
:
statistics
:
mean
categories
:
HPCG gpu-hpcg:r32
metrics
:
-
gpu-hpcg:r32/is_valid
-
gpu-hpcg:r32/final_gflops
-
gpu-hpcg:r32/final_bandwidth
-
gpu-hpcg:r32/final_gflops_per_process
-
gpu-hpcg:r32/final_bandwidth_per_process
-
gpu-hpcg:r32/ddot_gflops
-
gpu-hpcg:r32/ddot_bandwidth
-
gpu-hpcg:r32/ddot_gflops_per_process
-
gpu-hpcg:r32/ddot_bandwidth_per_process
-
gpu-hpcg:r32/waxpby_gflops
-
gpu-hpcg:r32/waxpby_bandwidth
-
gpu-hpcg:r32/waxpby_gflops_per_process
-
gpu-hpcg:r32/waxpby_bandwidth_per_process
-
gpu-hpcg:r32/spmv_gflops
-
gpu-hpcg:r32/spmv_bandwidth
-
gpu-hpcg:r32/spmv_gflops_per_process
-
gpu-hpcg:r32/spmv_bandwidth_per_process
-
gpu-hpcg:r32/mg_gflops
-
gpu-hpcg:r32/mg_bandwidth
-
gpu-hpcg:r32/mg_gflops_per_process
-
gpu-hpcg:r32/mg_bandwidth_per_process
-
gpu-hpcg:r32/total_gflops
-
gpu-hpcg:r32/total_bandwidth
-
gpu-hpcg:r32/total_gflops_per_process
-
gpu-hpcg:r32/total_bandwidth_per_process
-
gpu-hpcg:r32/local_domain_x
-
gpu-hpcg:r32/local_domain_y
-
gpu-hpcg:r32/local_domain_z
-
gpu-hpcg:r32/process_domain_x
-
gpu-hpcg:r32/process_domain_y
-
gpu-hpcg:r32/process_domain_z
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment