Unverified Commit a58f218b authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: Update - Update benchmarks in configuration file (#208)

**Description**
Update benchmarks in configuration files for single node validation of superbench v0.3.

**Major Revision**
- fix bugs of parameters in nccl-bw for single node validation in configs
- update new benchmarks in amd_mi100_hpe.yaml, amd_mi100_z53.yaml, azure_ndv4.yaml
- fix bug of wrong gpu visible prefix
parent 6fb0fb12
...@@ -40,16 +40,12 @@ superbench: ...@@ -40,16 +40,12 @@ superbench:
rccl-bw: rccl-bw:
enable: true enable: true
modes: modes:
- name: mpi - name: local
proc_num: 8 proc_num: 1
env: parallel: no
NCCL_SOCKET_IFNAME: ens17f0
NCCL_IB_GDR_LEVEL: 1
parameters: parameters:
maxbytes: 128M maxbytes: 8G
minbytes: 32M ngpus: 8
iters: 50
ngpus: 1
operation: allreduce operation: allreduce
mem-bw: mem-bw:
<<: *default_local_mode <<: *default_local_mode
...@@ -79,7 +75,7 @@ superbench: ...@@ -79,7 +75,7 @@ superbench:
modes: modes:
- name: local - name: local
proc_num: 32 proc_num: 32
prefix: CUDA_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4)) prefix: HIP_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
parallel: no parallel: no
parameters: parameters:
mem_type: mem_type:
......
...@@ -13,7 +13,7 @@ superbench: ...@@ -13,7 +13,7 @@ superbench:
modes: modes:
- name: local - name: local
proc_num: 8 proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank} prefix: HIP_VISIBLE_DEVICES={proc_rank}
parallel: yes parallel: yes
default_pytorch_mode: &default_pytorch_mode default_pytorch_mode: &default_pytorch_mode
enable: true enable: true
...@@ -36,6 +36,52 @@ superbench: ...@@ -36,6 +36,52 @@ superbench:
- train - train
pin_memory: yes pin_memory: yes
benchmarks: benchmarks:
kernel-launch:
<<: *default_local_mode
rccl-bw:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
maxbytes: 8G
ngpus: 8
operation: allreduce
mem-bw:
<<: *default_local_mode
gemm-flops:
<<: *default_local_mode
parameters:
m: 7680
n: 8192
k: 8192
ib-loopback:
enable: true
modes:
- name: local
proc_num: 2
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,1
parallel: no
disk-benchmark:
enable: false
modes:
- name: local
proc_num: 1
parallel: no
parameters:
block_devices: []
gpu-sm-copy-bw:
enable: true
modes:
- name: local
proc_num: 32
prefix: HIP_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
parallel: no
parameters:
mem_type:
- dtoh
- htod
gpt_models: gpt_models:
<<: *default_pytorch_mode <<: *default_pytorch_mode
models: models:
......
...@@ -35,6 +35,32 @@ superbench: ...@@ -35,6 +35,32 @@ superbench:
<<: *default_local_mode <<: *default_local_mode
gemm-flops: gemm-flops:
<<: *default_local_mode <<: *default_local_mode
nccl-bw:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
ngpus: 8
ib-loopback:
enable: true
modes:
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2
parallel: yes
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2
parallel: yes
mem-bw:
enable: true
modes:
- name: local
proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -c $(({proc_rank}/2))
parallel: yes
disk-benchmark: disk-benchmark:
enable: false enable: false
modes: modes:
......
...@@ -32,7 +32,10 @@ superbench: ...@@ -32,7 +32,10 @@ superbench:
enable: true enable: true
modes: modes:
- name: local - name: local
prefix: NCCL_DEBUG=INFO NCCL_IB_DISABLE=1 proc_num: 1
parallel: no
parameters:
ngpus: 8
ib-loopback: ib-loopback:
enable: true enable: true
modes: modes:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment