Unverified Commit bd8f105d authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Benchmarks - Add config file for NDm A100 v4 (#255)

Add config file for Azure NDm A100 v4 SKU.
parent 8042fa34
# SuperBench Config
#
# Azure NDm A100 v4
# reference: https://docs.microsoft.com/en-us/azure/virtual-machines/ndm-a100-v4-series
version: v0.3
superbench:
enable: null
var:
default_local_mode: &default_local_mode
enable: true
modes:
- name: local
proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank}
parallel: yes
default_pytorch_mode: &default_pytorch_mode
enable: true
modes:
- name: torch.distributed
proc_num: 8
node_num: 1
frameworks:
- pytorch
common_model_config: &common_model_config
duration: 0
num_warmup: 64
num_steps: 2048
sample_count: 8192
batch_size: 32
precision:
- float32
- float16
model_action:
- train
pin_memory: yes
benchmarks:
kernel-launch:
<<: *default_local_mode
gemm-flops:
<<: *default_local_mode
nccl-bw:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
ngpus: 8
ib-loopback:
enable: true
modes:
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2
parallel: yes
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2
parallel: yes
mem-bw:
enable: true
modes:
- name: local
proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -N $(({proc_rank}/2))
parallel: yes
disk-benchmark:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
block_devices:
- /dev/nvme0n1
- /dev/nvme1n1
- /dev/nvme2n1
- /dev/nvme3n1
- /dev/nvme4n1
- /dev/nvme5n1
- /dev/nvme6n1
- /dev/nvme7n1
seq_read_runtime: 60
seq_write_runtime: 60
seq_readwrite_runtime: 60
rand_read_runtime: 60
rand_write_runtime: 60
rand_readwrite_runtime: 60
gpu-copy-bw:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
cudnn-function:
<<: *default_local_mode
cublas-function:
<<: *default_local_mode
matmul:
<<: *default_local_mode
frameworks:
- pytorch
sharding-matmul:
<<: *default_pytorch_mode
computation-communication-overlap:
<<: *default_pytorch_mode
gpt_models:
<<: *default_pytorch_mode
models:
- gpt2-small
- gpt2-large
parameters:
<<: *common_model_config
batch_size: 8
seq_len: 224
bert_models:
<<: *default_pytorch_mode
models:
- bert-base
- bert-large
parameters:
<<: *common_model_config
seq_len: 224
lstm_models:
<<: *default_pytorch_mode
models:
- lstm
parameters:
<<: *common_model_config
batch_size: 224
input_size: 224
hidden_size: 1000
seq_len: 32
pin_memory: no
resnet_models:
<<: *default_pytorch_mode
models:
- resnet50
- resnet101
- resnet152
parameters:
<<: *common_model_config
batch_size: 192
num_steps: 512
densenet_models:
<<: *default_pytorch_mode
models:
- densenet169
- densenet201
parameters:
<<: *common_model_config
pin_memory: no
vgg_models:
<<: *default_pytorch_mode
models:
- vgg11
- vgg13
- vgg16
- vgg19
parameters:
<<: *common_model_config
pin_memory: no
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment