Unverified Commit 511807b7 authored by one's avatar one Committed by GitHub
Browse files

Config: Update config files (#7)

- Add BW150 config
- Update BW1000 config
- Merge summary rules
parent 0993db75
......@@ -6,7 +6,7 @@ superbench:
enable: false
var:
default_local_mode: &default_local_mode
enable: false
enable: true
modes:
- name: local
proc_num: 8
......@@ -44,23 +44,23 @@ superbench:
n: 8192
k: 8192
hipblaslt-gemm:
enable: false
enable: true
modes:
- name: local
proc_num: 8
prefix: HIP_VISIBLE_DEVICES={proc_rank}
parallel: yes
- name: local
proc_num: 8
prefix: HIP_VISIBLE_DEVICES={proc_rank}
parallel: yes
parameters:
in_types: ["fp32", "fp16", "bf16"]
in_types: [ "fp32", "fp16", "bf16" ]
tolerant_fail: yes
num_warmup: 100
num_steps: 1000
shapes:
- 4096,4096,4096
- 8192,8192,8192
- 16384,16384,16384
- 4096,4096,4096
- 8192,8192,8192
- 16384,16384,16384
gpu-stream:
enable: false
enable: true
modes:
- name: local
proc_num: 8
......@@ -71,7 +71,7 @@ superbench:
num_loops: 100
precision: double
rccl-bw:allreduce-r16:
enable: true
enable: false
modes:
- name: mpi
proc_num: 8
......@@ -92,57 +92,37 @@ superbench:
maxbytes: 16G
ngpus: 1
operation: allreduce
rccl-bw:allreduce-r8-pcie:
rccl-bw:allreduce-r8:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: 1
bind_to: none
mca:
pml: ucx
btl: ^openib
btl_tcp_if_exclude: lo,docker0
coll_hcoll_enable: 0
env:
ROCM_PATH: /opt/dtk
HSA_FORCE_FINE_GRAIN_PCIE: 1
NCCL_SOCKET_IFNAME: p14p2
NCCL_NET_GDR_LEVEL: PHB
NCCL_NET_GDR_READ: 1
NCCL_BUFFSIZE: 4194304
NCCL_SIMPLE_CHANNELS: 32
RCCL_P2P_XHCL_CHANNEL_NUM: 31
RCCL_COLL_XHCL_CHANNEL_NUM: 28
parameters:
maxbytes: 16G
ngpus: 1
operation: allreduce
rccl-bw:allreduce-r8:
enable: false
rccl-bw:allreduce-r8-pcie:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: 1
bind_to: none
mca:
pml: ucx
btl: ^openib
btl_tcp_if_exclude: lo,docker0
coll_hcoll_enable: 0
env:
ROCM_PATH: /opt/dtk
HSA_FORCE_FINE_GRAIN_PCIE: 1
NCCL_SOCKET_IFNAME: p14p2
NCCL_NET_GDR_LEVEL: PHB
NCCL_NET_GDR_READ: 1
NCCL_BUFFSIZE: 4194304
NCCL_SIMPLE_CHANNELS: 32
RCCL_P2P_XHCL_CHANNEL_NUM: 31
RCCL_COLL_XHCL_CHANNEL_NUM: 28
parameters:
maxbytes: 16G
ngpus: 1
operation: allreduce
rccl-bw:alltoall-r16:
enable: true
enable: false
modes:
- name: mpi
proc_num: 8
......@@ -163,6 +143,17 @@ superbench:
maxbytes: 16G
ngpus: 1
operation: alltoall
rccl-bw:alltoall-r8:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: 1
bind_to: none
parameters:
maxbytes: 16G
ngpus: 1
operation: alltoall
gpu-hpcg:r32:
enable: false
modes:
......@@ -182,12 +173,12 @@ superbench:
nx: 560
ny: 280
nz: 280
rt: 60
rt: 10
npx: 4
npy: 4
npz: 2
gpu-hpcg:r16:
enable: true
enable: false
modes:
- name: mpi
proc_num: 8
......@@ -205,7 +196,7 @@ superbench:
nx: 560
ny: 280
nz: 280
rt: 0
rt: 10
npx: 4
npy: 2
npz: 2
......@@ -220,7 +211,7 @@ superbench:
nx: 560
ny: 280
nz: 280
rt: 60
rt: 10
npx: 2
npy: 2
npz: 2
......@@ -235,7 +226,7 @@ superbench:
nx: 560
ny: 280
nz: 280
rt: 60
rt: 10
npx: 2
npy: 2
npz: 1
......@@ -250,7 +241,7 @@ superbench:
nx: 560
ny: 280
nz: 280
rt: 60
rt: 10
npx: 2
npy: 1
npz: 1
......@@ -265,7 +256,7 @@ superbench:
nx: 560
ny: 280
nz: 280
rt: 60
rt: 10
npx: 1
npy: 1
npz: 1
......@@ -281,7 +272,7 @@ superbench:
- latency_matrix
- max_bandwidth
mem-bw:
enable: false
enable: true
modes:
- name: local
proc_num: 8
......@@ -290,10 +281,10 @@ superbench:
ib-loopback:
enable: false
modes:
- name: local
proc_num: 16
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7 numactl -N $(({proc_rank}/8)) -m $(({proc_rank}/8))
parallel: no
- name: local
proc_num: 16
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7 numactl -N $(({proc_rank}/8)) -m $(({proc_rank}/8))
parallel: no
parameters:
msg_size: 8388608
disk-benchmark:
......@@ -305,7 +296,7 @@ superbench:
parameters:
block_devices: []
gpu-copy-bw:correctness:
enable: false
enable: true
modes:
- name: local
parallel: no
......@@ -317,7 +308,7 @@ superbench:
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: false
enable: true
modes:
- name: local
parallel: no
......
# SuperBench summary rules for hygon_bw1000.yaml
#
# Usage:
# sb result summary \
# --data-file <output-dir>/results-summary.jsonl \
# --rule-file superbench/config/hygon_bw1000_summary.yaml \
# --output-file-format md \
# --output-dir <summary-output-dir>
#
# Notes:
# - This file focuses on RCCL benchmarks defined in
# superbench/config/hygon_bw1000.yaml.
# - Unmatched benchmark sections are allowed. If a benchmark was not run,
# the corresponding category in the summary will be empty.
version: v0.12
superbench:
rules:
rccl_bw_allreduce_r16_bandwidth:
statistics: mean
categories: RCCL rccl-bw:allreduce-r16
metrics:
- rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_pcie_bandwidth:
statistics: mean
categories: RCCL rccl-bw:allreduce-r8-pcie
metrics:
- rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_bandwidth:
statistics: mean
categories: RCCL rccl-bw:allreduce-r8
metrics:
- rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r16_bandwidth:
statistics: mean
categories: RCCL rccl-bw:alltoall-r16
metrics:
- rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
# SuperBench Config
version: v0.12
superbench:
enable: null
monitor:
enable: false
var:
default_local_mode: &default_local_mode
enable: true
modes:
- name: local
proc_num: 8
prefix: HIP_VISIBLE_DEVICES={proc_rank}
parallel: yes
default_pytorch_mode: &default_pytorch_mode
enable: true
modes:
- name: torch.distributed
proc_num: 8
node_num: 1
frameworks:
- pytorch
common_model_config: &common_model_config
model_ddp_parameter: &model_ddp_param
duration: 0
num_warmup: 128
num_steps: 512
sample_count: 8192
batch_size: 128
precision: [float32, float16]
model_action: [train]
pin_memory: yes
num_workers: 0
benchmarks:
kernel-launch:
<<: *default_local_mode
gemm-flops:
<<: *default_local_mode
parameters:
m: 7680
n: 8192
k: 8192
hipblaslt-gemm:
enable: true
modes:
- name: local
proc_num: 8
prefix: HIP_VISIBLE_DEVICES={proc_rank}
parallel: yes
parameters:
in_types: ["fp32", "fp16", "bf16"]
tolerant_fail: yes
num_warmup: 100
num_steps: 1000
shapes:
- 4096,4096,4096
- 8192,8192,8192
- 16384,16384,16384
gpu-stream:
enable: true
modes:
- name: local
proc_num: 8
prefix: HIP_VISIBLE_DEVICES={proc_rank}
parallel: yes
parameters:
array_size: 268435456
num_loops: 100
precision: double
rccl-bw:allreduce-r8:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: 1
bind_to: none
parameters:
maxbytes: 16G
ngpus: 1
operation: allreduce
rccl-bw:allreduce-r4:
enable: true
modes:
- name: mpi
proc_num: 4
node_num: 1
bind_to: none
env:
NCCL_BUFFSIZE: 4194304
parameters:
maxbytes: 16G
ngpus: 1
operation: allreduce
rccl-bw:allreduce-r4-graph:
enable: true
modes:
- name: mpi
proc_num: 4
node_num: 1
bind_to: none
env:
NCCL_BUFFSIZE: 4194304
NCCL_RINGS: "0 1 2 3|0 3 2 1|0 1 3 2|0 2 3 1|0 2 1 3|0 3 1 2"
parameters:
maxbytes: 16G
ngpus: 1
operation: allreduce
rccl-bw:allreduce-r4-pcie:
enable: true
modes:
- name: mpi
proc_num: 4
node_num: 1
bind_to: none
env:
NCCL_BUFFSIZE: 4194304
NCCL_SIMPLE_CHANNELS: 20
RCCL_P2P_XHCL_CHANNEL_NUM: 16
RCCL_COLL_XHCL_CHANNEL_NUM: 16
parameters:
maxbytes: 16G
ngpus: 1
operation: allreduce
rccl-bw:allreduce-r4-graph-pcie:
enable: true
modes:
- name: mpi
proc_num: 4
node_num: 1
bind_to: none
env:
NCCL_BUFFSIZE: 4194304
NCCL_SIMPLE_CHANNELS: 20
RCCL_P2P_XHCL_CHANNEL_NUM: 16
RCCL_COLL_XHCL_CHANNEL_NUM: 16
NCCL_RINGS: "0 1 2 3|0 3 2 1|0 1 3 2|0 2 3 1|0 2 1 3|0 3 1 2"
parameters:
maxbytes: 16G
ngpus: 1
operation: allreduce
rccl-bw:alltoall-r8:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: 1
bind_to: none
parameters:
maxbytes: 16G
ngpus: 1
operation: alltoall
rccl-bw:alltoall-r4:
enable: true
modes:
- name: mpi
proc_num: 4
node_num: 1
bind_to: none
parameters:
maxbytes: 16G
ngpus: 1
operation: alltoall
gpu-hpcg:r32:
enable: false
modes:
- name: mpi
proc_num: 8
node_num: 4
host_list: [node01, node02, node03, node04]
bind_to: none
mca:
pml: ob1
btl: ^openib
btl_tcp_if_include: p14p2
coll_hcoll_enable: 0
parameters:
nx: 560
ny: 280
nz: 280
rt: 10
npx: 4
npy: 4
npz: 2
gpu-hpcg:r16:
enable: false
modes:
- name: mpi
proc_num: 8
node_num: 2
host_list: [node01, node02]
bind_to: none
mca:
pml: ob1
btl: ^openib
btl_tcp_if_include: p14p2
coll_hcoll_enable: 0
parameters:
nx: 560
ny: 280
nz: 280
rt: 10
npx: 4
npy: 2
npz: 2
gpu-hpcg:r8:
enable: false
modes:
- name: mpi
proc_num: 8
node_num: 1
bind_to: none
parameters:
nx: 560
ny: 280
nz: 280
rt: 10
npx: 2
npy: 2
npz: 2
gpu-hpcg:r4:
enable: false
modes:
- name: mpi
proc_num: 4
node_num: 1
bind_to: none
parameters:
nx: 560
ny: 280
nz: 280
rt: 10
npx: 2
npy: 2
npz: 1
gpu-hpcg:r2:
enable: false
modes:
- name: mpi
proc_num: 2
node_num: 1
bind_to: none
parameters:
nx: 560
ny: 280
nz: 280
rt: 10
npx: 2
npy: 1
npz: 1
gpu-hpcg:r1:
enable: false
modes:
- name: mpi
proc_num: 1
node_num: 1
bind_to: none
parameters:
nx: 560
ny: 280
nz: 280
rt: 10
npx: 1
npy: 1
npz: 1
cpu-memory-bw-latency:
enable: false
modes:
- name: local
proc_num: 1
parallel: no
parameters:
tests:
- bandwidth_matrix
- latency_matrix
- max_bandwidth
mem-bw:
enable: true
modes:
- name: local
proc_num: 8
prefix: HIP_VISIBLE_DEVICES={proc_rank}
parallel: no
ib-loopback:
enable: false
modes:
- name: local
proc_num: 16
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7 numactl -N $(({proc_rank}/8)) -m $(({proc_rank}/8))
parallel: no
parameters:
msg_size: 8388608
disk-benchmark:
enable: false
modes:
- name: local
proc_num: 1
parallel: no
parameters:
block_devices: []
gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type: [htod, dtoh, dtod, one_to_all, all_to_one, all_to_all]
copy_type: [sm, dma]
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type: [htod, dtoh, dtod, one_to_all, all_to_one, all_to_all]
copy_type: [sm, dma]
# dist-inference:
# modes:
# - name: mpi
# proc_num: 8
# node_num: 1
# mca:
# pml: ob1
# btl: ^openib
# btl_tcp_if_exclude: lo,docker0
# coll_hcoll_enable: 0
# frameworks:
# - pytorch
# parameters:
# num_layers: 50
# num_warmup: 20
# num_steps: 100
# use_cuda_graph: true
# precision: float16
# hidden_size: 128
# input_size: 128
# batch_size: 1024
# SuperBench summary rules for Hygon BW configurations
#
# Usage:
# sb result summary \
# --data-file <output-dir>/results-summary.jsonl \
# --rule-file superbench/config/hygon_bw_summary.yaml \
# --output-file-format md \
# --output-dir <summary-output-dir>
#
# Notes:
# - This file covers common RCCL and GPU HPCG benchmark metrics used by
# Hygon BW configuration files.
# - Unmatched benchmark sections are allowed. If a benchmark was not run,
# the corresponding category in the summary will be empty.
version: v0.12
superbench:
rules:
rccl_bw_allreduce_r16_bandwidth:
statistics: mean
categories: RCCL rccl-bw:allreduce-r16
metrics:
- rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:allreduce-r16/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_bandwidth:
statistics: mean
categories: RCCL rccl-bw:allreduce-r8
metrics:
- rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:allreduce-r8/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r8_pcie_bandwidth:
statistics: mean
categories: RCCL rccl-bw:allreduce-r8-pcie
metrics:
- rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:allreduce-r8-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_bandwidth:
statistics: mean
categories: RCCL rccl-bw:allreduce-r4
metrics:
- rccl-bw:allreduce-r4/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:allreduce-r4/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_graph_bandwidth:
statistics: mean
categories: RCCL rccl-bw:allreduce-r4-graph
metrics:
- rccl-bw:allreduce-r4-graph/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:allreduce-r4-graph/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_pcie_bandwidth:
statistics: mean
categories: RCCL rccl-bw:allreduce-r4-pcie
metrics:
- rccl-bw:allreduce-r4-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:allreduce-r4-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_allreduce_r4_graph_pcie_bandwidth:
statistics: mean
categories: RCCL rccl-bw:allreduce-r4-graph-pcie
metrics:
- rccl-bw:allreduce-r4-graph-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:allreduce-r4-graph-pcie/allreduce_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r8_bandwidth:
statistics: mean
categories: RCCL rccl-bw:alltoall-r8
metrics:
- rccl-bw:alltoall-r8/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:alltoall-r8/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r16_bandwidth:
statistics: mean
categories: RCCL rccl-bw:alltoall-r16
metrics:
- rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:alltoall-r16/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
rccl_bw_alltoall_r4_bandwidth:
statistics: mean
categories: RCCL rccl-bw:alltoall-r4
metrics:
- rccl-bw:alltoall-r4/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_busbw
- rccl-bw:alltoall-r4/alltoall_(8388608|67108864|1073741824|4294967296|8589934592|17179869184)_algbw
gpu_hpcg_r1:
statistics: mean
categories: HPCG gpu-hpcg:r1
metrics:
- gpu-hpcg:r1/is_valid
- gpu-hpcg:r1/final_gflops
- gpu-hpcg:r1/final_bandwidth
- gpu-hpcg:r1/final_gflops_per_process
- gpu-hpcg:r1/final_bandwidth_per_process
- gpu-hpcg:r1/ddot_gflops
- gpu-hpcg:r1/ddot_bandwidth
- gpu-hpcg:r1/ddot_gflops_per_process
- gpu-hpcg:r1/ddot_bandwidth_per_process
- gpu-hpcg:r1/waxpby_gflops
- gpu-hpcg:r1/waxpby_bandwidth
- gpu-hpcg:r1/waxpby_gflops_per_process
- gpu-hpcg:r1/waxpby_bandwidth_per_process
- gpu-hpcg:r1/spmv_gflops
- gpu-hpcg:r1/spmv_bandwidth
- gpu-hpcg:r1/spmv_gflops_per_process
- gpu-hpcg:r1/spmv_bandwidth_per_process
- gpu-hpcg:r1/mg_gflops
- gpu-hpcg:r1/mg_bandwidth
- gpu-hpcg:r1/mg_gflops_per_process
- gpu-hpcg:r1/mg_bandwidth_per_process
- gpu-hpcg:r1/total_gflops
- gpu-hpcg:r1/total_bandwidth
- gpu-hpcg:r1/total_gflops_per_process
- gpu-hpcg:r1/total_bandwidth_per_process
- gpu-hpcg:r1/local_domain_x
- gpu-hpcg:r1/local_domain_y
- gpu-hpcg:r1/local_domain_z
- gpu-hpcg:r1/process_domain_x
- gpu-hpcg:r1/process_domain_y
- gpu-hpcg:r1/process_domain_z
gpu_hpcg_r2:
statistics: mean
categories: HPCG gpu-hpcg:r2
metrics:
- gpu-hpcg:r2/is_valid
- gpu-hpcg:r2/final_gflops
- gpu-hpcg:r2/final_bandwidth
- gpu-hpcg:r2/final_gflops_per_process
- gpu-hpcg:r2/final_bandwidth_per_process
- gpu-hpcg:r2/ddot_gflops
- gpu-hpcg:r2/ddot_bandwidth
- gpu-hpcg:r2/ddot_gflops_per_process
- gpu-hpcg:r2/ddot_bandwidth_per_process
- gpu-hpcg:r2/waxpby_gflops
- gpu-hpcg:r2/waxpby_bandwidth
- gpu-hpcg:r2/waxpby_gflops_per_process
- gpu-hpcg:r2/waxpby_bandwidth_per_process
- gpu-hpcg:r2/spmv_gflops
- gpu-hpcg:r2/spmv_bandwidth
- gpu-hpcg:r2/spmv_gflops_per_process
- gpu-hpcg:r2/spmv_bandwidth_per_process
- gpu-hpcg:r2/mg_gflops
- gpu-hpcg:r2/mg_bandwidth
- gpu-hpcg:r2/mg_gflops_per_process
- gpu-hpcg:r2/mg_bandwidth_per_process
- gpu-hpcg:r2/total_gflops
- gpu-hpcg:r2/total_bandwidth
- gpu-hpcg:r2/total_gflops_per_process
- gpu-hpcg:r2/total_bandwidth_per_process
- gpu-hpcg:r2/local_domain_x
- gpu-hpcg:r2/local_domain_y
- gpu-hpcg:r2/local_domain_z
- gpu-hpcg:r2/process_domain_x
- gpu-hpcg:r2/process_domain_y
- gpu-hpcg:r2/process_domain_z
gpu_hpcg_r4:
statistics: mean
categories: HPCG gpu-hpcg:r4
metrics:
- gpu-hpcg:r4/is_valid
- gpu-hpcg:r4/final_gflops
- gpu-hpcg:r4/final_bandwidth
- gpu-hpcg:r4/final_gflops_per_process
- gpu-hpcg:r4/final_bandwidth_per_process
- gpu-hpcg:r4/ddot_gflops
- gpu-hpcg:r4/ddot_bandwidth
- gpu-hpcg:r4/ddot_gflops_per_process
- gpu-hpcg:r4/ddot_bandwidth_per_process
- gpu-hpcg:r4/waxpby_gflops
- gpu-hpcg:r4/waxpby_bandwidth
- gpu-hpcg:r4/waxpby_gflops_per_process
- gpu-hpcg:r4/waxpby_bandwidth_per_process
- gpu-hpcg:r4/spmv_gflops
- gpu-hpcg:r4/spmv_bandwidth
- gpu-hpcg:r4/spmv_gflops_per_process
- gpu-hpcg:r4/spmv_bandwidth_per_process
- gpu-hpcg:r4/mg_gflops
- gpu-hpcg:r4/mg_bandwidth
- gpu-hpcg:r4/mg_gflops_per_process
- gpu-hpcg:r4/mg_bandwidth_per_process
- gpu-hpcg:r4/total_gflops
- gpu-hpcg:r4/total_bandwidth
- gpu-hpcg:r4/total_gflops_per_process
- gpu-hpcg:r4/total_bandwidth_per_process
- gpu-hpcg:r4/local_domain_x
- gpu-hpcg:r4/local_domain_y
- gpu-hpcg:r4/local_domain_z
- gpu-hpcg:r4/process_domain_x
- gpu-hpcg:r4/process_domain_y
- gpu-hpcg:r4/process_domain_z
gpu_hpcg_r8:
statistics: mean
categories: HPCG gpu-hpcg:r8
metrics:
- gpu-hpcg:r8/is_valid
- gpu-hpcg:r8/final_gflops
- gpu-hpcg:r8/final_bandwidth
- gpu-hpcg:r8/final_gflops_per_process
- gpu-hpcg:r8/final_bandwidth_per_process
- gpu-hpcg:r8/ddot_gflops
- gpu-hpcg:r8/ddot_bandwidth
- gpu-hpcg:r8/ddot_gflops_per_process
- gpu-hpcg:r8/ddot_bandwidth_per_process
- gpu-hpcg:r8/waxpby_gflops
- gpu-hpcg:r8/waxpby_bandwidth
- gpu-hpcg:r8/waxpby_gflops_per_process
- gpu-hpcg:r8/waxpby_bandwidth_per_process
- gpu-hpcg:r8/spmv_gflops
- gpu-hpcg:r8/spmv_bandwidth
- gpu-hpcg:r8/spmv_gflops_per_process
- gpu-hpcg:r8/spmv_bandwidth_per_process
- gpu-hpcg:r8/mg_gflops
- gpu-hpcg:r8/mg_bandwidth
- gpu-hpcg:r8/mg_gflops_per_process
- gpu-hpcg:r8/mg_bandwidth_per_process
- gpu-hpcg:r8/total_gflops
- gpu-hpcg:r8/total_bandwidth
- gpu-hpcg:r8/total_gflops_per_process
- gpu-hpcg:r8/total_bandwidth_per_process
- gpu-hpcg:r8/local_domain_x
- gpu-hpcg:r8/local_domain_y
- gpu-hpcg:r8/local_domain_z
- gpu-hpcg:r8/process_domain_x
- gpu-hpcg:r8/process_domain_y
- gpu-hpcg:r8/process_domain_z
gpu_hpcg_r16:
statistics: mean
categories: HPCG gpu-hpcg:r16
metrics:
- gpu-hpcg:r16/is_valid
- gpu-hpcg:r16/final_gflops
- gpu-hpcg:r16/final_bandwidth
- gpu-hpcg:r16/final_gflops_per_process
- gpu-hpcg:r16/final_bandwidth_per_process
- gpu-hpcg:r16/ddot_gflops
- gpu-hpcg:r16/ddot_bandwidth
- gpu-hpcg:r16/ddot_gflops_per_process
- gpu-hpcg:r16/ddot_bandwidth_per_process
- gpu-hpcg:r16/waxpby_gflops
- gpu-hpcg:r16/waxpby_bandwidth
- gpu-hpcg:r16/waxpby_gflops_per_process
- gpu-hpcg:r16/waxpby_bandwidth_per_process
- gpu-hpcg:r16/spmv_gflops
- gpu-hpcg:r16/spmv_bandwidth
- gpu-hpcg:r16/spmv_gflops_per_process
- gpu-hpcg:r16/spmv_bandwidth_per_process
- gpu-hpcg:r16/mg_gflops
- gpu-hpcg:r16/mg_bandwidth
- gpu-hpcg:r16/mg_gflops_per_process
- gpu-hpcg:r16/mg_bandwidth_per_process
- gpu-hpcg:r16/total_gflops
- gpu-hpcg:r16/total_bandwidth
- gpu-hpcg:r16/total_gflops_per_process
- gpu-hpcg:r16/total_bandwidth_per_process
- gpu-hpcg:r16/local_domain_x
- gpu-hpcg:r16/local_domain_y
- gpu-hpcg:r16/local_domain_z
- gpu-hpcg:r16/process_domain_x
- gpu-hpcg:r16/process_domain_y
- gpu-hpcg:r16/process_domain_z
gpu_hpcg_r32:
statistics: mean
categories: HPCG gpu-hpcg:r32
metrics:
- gpu-hpcg:r32/is_valid
- gpu-hpcg:r32/final_gflops
- gpu-hpcg:r32/final_bandwidth
- gpu-hpcg:r32/final_gflops_per_process
- gpu-hpcg:r32/final_bandwidth_per_process
- gpu-hpcg:r32/ddot_gflops
- gpu-hpcg:r32/ddot_bandwidth
- gpu-hpcg:r32/ddot_gflops_per_process
- gpu-hpcg:r32/ddot_bandwidth_per_process
- gpu-hpcg:r32/waxpby_gflops
- gpu-hpcg:r32/waxpby_bandwidth
- gpu-hpcg:r32/waxpby_gflops_per_process
- gpu-hpcg:r32/waxpby_bandwidth_per_process
- gpu-hpcg:r32/spmv_gflops
- gpu-hpcg:r32/spmv_bandwidth
- gpu-hpcg:r32/spmv_gflops_per_process
- gpu-hpcg:r32/spmv_bandwidth_per_process
- gpu-hpcg:r32/mg_gflops
- gpu-hpcg:r32/mg_bandwidth
- gpu-hpcg:r32/mg_gflops_per_process
- gpu-hpcg:r32/mg_bandwidth_per_process
- gpu-hpcg:r32/total_gflops
- gpu-hpcg:r32/total_bandwidth
- gpu-hpcg:r32/total_gflops_per_process
- gpu-hpcg:r32/total_bandwidth_per_process
- gpu-hpcg:r32/local_domain_x
- gpu-hpcg:r32/local_domain_y
- gpu-hpcg:r32/local_domain_z
- gpu-hpcg:r32/process_domain_x
- gpu-hpcg:r32/process_domain_y
- gpu-hpcg:r32/process_domain_z
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment