# SuperBench Config version: v0.3 superbench: enable: null monitor: enable: false sample_duration: 1 sample_interval: 10 var: default_local_mode: &default_local_mode enable: true modes: - name: local proc_num: 8 prefix: CUDA_VISIBLE_DEVICES={proc_rank} parallel: yes default_pytorch_mode: &default_pytorch_mode enable: true modes: - name: torch.distributed proc_num: 8 node_num: 1 frameworks: - pytorch common_model_config: &common_model_config duration: 0 num_warmup: 16 num_steps: 128 precision: - float32 - float16 model_action: - train benchmarks: nccl-bw: enable: true modes: - name: local proc_num: 1 parallel: no parameters: ngpus: 8 ib-loopback: enable: true modes: - name: local proc_num: 4 prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2 parallel: yes - name: local proc_num: 4 prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2 parallel: yes disk-benchmark: enable: false modes: - name: local proc_num: 1 parallel: no parameters: block_devices: - /dev/nvme0n1 mem-bw: enable: true modes: - name: local proc_num: 8 prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -N $(({proc_rank}/2)) parallel: no gpu-copy-bw: enable: true modes: - name: local parallel: no parameters: mem_type: - htod - dtoh - dtod copy_type: - sm - dma kernel-launch: <<: *default_local_mode gemm-flops: <<: *default_local_mode cudnn-function: <<: *default_local_mode cublas-function: <<: *default_local_mode matmul: <<: *default_local_mode frameworks: - pytorch sharding-matmul: <<: *default_pytorch_mode computation-communication-overlap: <<: *default_pytorch_mode gpt_models: <<: *default_pytorch_mode models: - gpt2-small - gpt2-large parameters: <<: *common_model_config batch_size: 4 bert_models: <<: *default_pytorch_mode models: - bert-base - bert-large parameters: <<: *common_model_config batch_size: 8 lstm_models: <<: *default_pytorch_mode models: - lstm parameters: <<: *common_model_config batch_size: 128 resnet_models: <<: *default_pytorch_mode models: - resnet50 - resnet101 - resnet152 parameters: <<: *common_model_config batch_size: 128 densenet_models: <<: *default_pytorch_mode models: - densenet169 - densenet201 parameters: <<: *common_model_config batch_size: 128 vgg_models: <<: *default_pytorch_mode models: - vgg11 - vgg13 - vgg16 - vgg19 parameters: <<: *common_model_config batch_size: 128