Unverified Commit a58f218b authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: Update - Update benchmarks in configuration file (#208)

**Description**
Update benchmarks in configuration files for single node validation of superbench v0.3.

**Major Revision**
- fix bugs of parameters in nccl-bw for single node validation in configs
- update new benchmarks in amd_mi100_hpe.yaml, amd_mi100_z53.yaml, azure_ndv4.yaml
- fix bug of wrong gpu visible prefix
parent 6fb0fb12
......@@ -40,16 +40,12 @@ superbench:
rccl-bw:
enable: true
modes:
- name: mpi
proc_num: 8
env:
NCCL_SOCKET_IFNAME: ens17f0
NCCL_IB_GDR_LEVEL: 1
- name: local
proc_num: 1
parallel: no
parameters:
maxbytes: 128M
minbytes: 32M
iters: 50
ngpus: 1
maxbytes: 8G
ngpus: 8
operation: allreduce
mem-bw:
<<: *default_local_mode
......@@ -79,7 +75,7 @@ superbench:
modes:
- name: local
proc_num: 32
prefix: CUDA_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
prefix: HIP_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
parallel: no
parameters:
mem_type:
......
......@@ -13,7 +13,7 @@ superbench:
modes:
- name: local
proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank}
prefix: HIP_VISIBLE_DEVICES={proc_rank}
parallel: yes
default_pytorch_mode: &default_pytorch_mode
enable: true
......@@ -36,6 +36,52 @@ superbench:
- train
pin_memory: yes
benchmarks:
kernel-launch:
<<: *default_local_mode
rccl-bw:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
maxbytes: 8G
ngpus: 8
operation: allreduce
mem-bw:
<<: *default_local_mode
gemm-flops:
<<: *default_local_mode
parameters:
m: 7680
n: 8192
k: 8192
ib-loopback:
enable: true
modes:
- name: local
proc_num: 2
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,1
parallel: no
disk-benchmark:
enable: false
modes:
- name: local
proc_num: 1
parallel: no
parameters:
block_devices: []
gpu-sm-copy-bw:
enable: true
modes:
- name: local
proc_num: 32
prefix: HIP_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
parallel: no
parameters:
mem_type:
- dtoh
- htod
gpt_models:
<<: *default_pytorch_mode
models:
......
......@@ -35,6 +35,32 @@ superbench:
<<: *default_local_mode
gemm-flops:
<<: *default_local_mode
nccl-bw:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
ngpus: 8
ib-loopback:
enable: true
modes:
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2
parallel: yes
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2
parallel: yes
mem-bw:
enable: true
modes:
- name: local
proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -c $(({proc_rank}/2))
parallel: yes
disk-benchmark:
enable: false
modes:
......
......@@ -32,7 +32,10 @@ superbench:
enable: true
modes:
- name: local
prefix: NCCL_DEBUG=INFO NCCL_IB_DISABLE=1
proc_num: 1
parallel: no
parameters:
ngpus: 8
ib-loopback:
enable: true
modes:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment